| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,3 +12,4 @@ | |
|
|
||
| export BUILD_FOR_GPU=1 | ||
| export TEST_ON_GPU=1 | ||
| export TEST_PYCLARAGENOMICSANALYSIS=1 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| # | ||
| # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| # | ||
| # NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| # and proprietary rights in and to this software, related documentation | ||
| # and any modifications thereto. Any use, reproduction, disclosure or | ||
| # distribution of this software and related documentation without an express | ||
| # license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| # | ||
|
|
||
| # Check CUDA dependency for project. | ||
| find_package(CUDA 9.0 REQUIRED) | ||
|
|
||
| if(NOT ${CUDA_FOUND}) | ||
| message(FATAL_ERROR "CUDA not detected on system. Please install") | ||
| else() | ||
| message(STATUS "Using CUDA ${CUDA_VERSION} from ${CUDA_TOOLKIT_ROOT_DIR}") | ||
| set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -lineinfo -use_fast_math -Xcompiler -Wall,-Wno-pedantic") | ||
| endif() | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,111 @@ | ||
| /* | ||
| * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| * | ||
| * NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| * and proprietary rights in and to this software, related documentation | ||
| * and any modifications thereto. Any use, reproduction, disclosure or | ||
| * distribution of this software and related documentation without an express | ||
| * license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| */ | ||
|
|
||
| #pragma once | ||
| /// \file | ||
| /// \defgroup cudautils Internal CUDA utilities package | ||
|
|
||
| #include <claragenomics/utils/cudaversions.hpp> | ||
| #include <claragenomics/logging/logging.hpp> | ||
|
|
||
| #include <cuda_runtime_api.h> | ||
| #include <stdexcept> | ||
|
|
||
| /// \ingroup cudautils | ||
| /// \{ | ||
|
|
||
| /// \ingroup cudautils | ||
| /// \def CGA_CU_CHECK_ERR | ||
| /// \brief Log on CUDA error in enclosed expression | ||
| #define CGA_CU_CHECK_ERR(ans) \ | ||
| { \ | ||
| claragenomics::cudautils::gpu_assert((ans), __FILE__, __LINE__); \ | ||
| } | ||
|
|
||
| /// \} | ||
|
|
||
| namespace claragenomics | ||
| { | ||
|
|
||
| namespace cudautils | ||
| { | ||
|
|
||
| /// gpu_assert | ||
| /// Logs and/or exits on cuda error | ||
| /// \ingroup cudautils | ||
| /// \param code The CUDA status code of the function being asserted | ||
| /// \param file Filename of the calling function | ||
| /// \param line File line number of the calling function | ||
| /// \param abort If true, hard-exit on CUDA error | ||
| inline void gpu_assert(cudaError_t code, const char* file, int line, bool abort = false) | ||
| { | ||
| if (code != cudaSuccess) | ||
| { | ||
| std::string err = "GPU Error:: " + | ||
| std::string(cudaGetErrorString(code)) + | ||
| " " + std::string(file) + | ||
| " " + std::to_string(line); | ||
| if (abort) | ||
| { | ||
| CGA_LOG_ERROR("{}\n", err); | ||
| std::abort(); | ||
| } | ||
| else | ||
| { | ||
| throw std::runtime_error(err); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// align | ||
| /// Alignment of memory chunks in cudapoa. Must be a power of two | ||
| /// \tparam IntType type of data to align | ||
| /// \tparam boundary Boundary to align to (NOTE: must be power of 2) | ||
| /// \param value Input value that is to be aligned | ||
| /// \return Value aligned to boundary | ||
| template <typename IntType, int32_t boundary> | ||
| __host__ __device__ __forceinline__ | ||
| IntType | ||
| align(const IntType& value) | ||
| { | ||
| static_assert((boundary & (boundary - 1)) == 0, "Boundary for align must be power of 2"); | ||
| return (value + boundary) & ~(boundary - 1); | ||
| } | ||
|
|
||
| } // namespace cudautils | ||
|
|
||
| /// \brief A class to switch the CUDA device for the current scope using RAII | ||
| /// | ||
| /// This class takes a CUDA device during construction, | ||
| /// switches to the given device using cudaSetDevice, | ||
| /// and switches back to the CUDA device which was current before the switch on destruction. | ||
| class scoped_device_switch | ||
| { | ||
| public: | ||
| explicit scoped_device_switch(int32_t device_id) | ||
| { | ||
| CGA_CU_CHECK_ERR(cudaGetDevice(&device_id_before_)); | ||
| CGA_CU_CHECK_ERR(cudaSetDevice(device_id)); | ||
| } | ||
|
|
||
| ~scoped_device_switch() | ||
| { | ||
| cudaSetDevice(device_id_before_); | ||
| } | ||
|
|
||
| scoped_device_switch() = delete; | ||
| scoped_device_switch(scoped_device_switch const&) = delete; | ||
| scoped_device_switch& operator=(scoped_device_switch const&) = delete; | ||
|
|
||
| private: | ||
| int32_t device_id_before_; | ||
| }; | ||
|
|
||
| } // namespace claragenomics |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| /* | ||
| * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| * | ||
| * NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| * and proprietary rights in and to this software, related documentation | ||
| * and any modifications thereto. Any use, reproduction, disclosure or | ||
| * distribution of this software and related documentation without an express | ||
| * license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| */ | ||
|
|
||
| #pragma once | ||
| /// \def CGA_CUDA_BEFORE_XX_X | ||
| /// \brief Macros to enable/disable CUDA version specific code | ||
| #define CGA_CUDA_BEFORE_XX_X 1 | ||
|
|
||
| #if __CUDACC_VER_MAJOR__ < 10 || (__CUDACC_VER_MAJOR__ == 10 && __CUDACC_VER_MINOR__ < 1) | ||
| #define CGA_CUDA_BEFORE_10_1 | ||
| #endif | ||
|
|
||
| #if (__CUDACC_VER_MAJOR__ == 9 && __CUDACC_VER_MINOR__ < 2) | ||
| #define CGA_CUDA_BEFORE_9_2 | ||
| #endif | ||
|
|
||
| /// \def CGA_CONSTEXPR | ||
| /// \brief C++ constexpr for device code - falls back to const for CUDA 10.0 and earlier | ||
| #ifdef CGA_CUDA_BEFORE_10_1 | ||
| #define CGA_CONSTEXPR const | ||
| #else | ||
| #define CGA_CONSTEXPR constexpr | ||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,91 @@ | ||
| /* | ||
| * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| * | ||
| * NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| * and proprietary rights in and to this software, related documentation | ||
| * and any modifications thereto. Any use, reproduction, disclosure or | ||
| * distribution of this software and related documentation without an express | ||
| * license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <claragenomics/utils/cudautils.hpp> | ||
| #include <exception> | ||
| #include <utility> | ||
| #include <cassert> | ||
| #ifndef NDEBUG | ||
| #include <limits> | ||
| #endif | ||
|
|
||
| namespace claragenomics | ||
| { | ||
|
|
||
| class device_memory_allocation_exception : public std::exception | ||
| { | ||
| public: | ||
| device_memory_allocation_exception() = default; | ||
| device_memory_allocation_exception(device_memory_allocation_exception const&) = default; | ||
| device_memory_allocation_exception& operator=(device_memory_allocation_exception const&) = default; | ||
| virtual ~device_memory_allocation_exception() = default; | ||
|
|
||
| virtual const char* what() const noexcept | ||
| { | ||
| return "Could not allocate device memory!"; | ||
| } | ||
| }; | ||
|
|
||
| template <typename T> | ||
| class device_buffer | ||
| { | ||
| public: | ||
| using value_type = T; | ||
|
|
||
| device_buffer() = default; | ||
|
|
||
| explicit device_buffer(size_t n_elements) | ||
| : size_(n_elements) | ||
| { | ||
| cudaError_t err = cudaMalloc(reinterpret_cast<void**>(&data_), size_ * sizeof(T)); | ||
| if (err == cudaErrorMemoryAllocation) | ||
| throw device_memory_allocation_exception(); | ||
| CGA_CU_CHECK_ERR(err); | ||
| } | ||
|
|
||
| device_buffer(device_buffer const&) = delete; | ||
| device_buffer& operator=(device_buffer const&) = delete; | ||
|
|
||
| device_buffer(device_buffer&& r) | ||
| : data_(std::exchange(r.data_, nullptr)), size_(std::exchange(r.size_, 0)) | ||
| { | ||
| } | ||
|
|
||
| device_buffer& operator=(device_buffer&& r) | ||
| { | ||
| data_ = std::exchange(r.data_, nullptr); | ||
| size_ = std::exchange(r.size_, 0); | ||
| return *this; | ||
| } | ||
|
|
||
| ~device_buffer() | ||
| { | ||
| cudaFree(data_); | ||
| } | ||
|
|
||
| value_type* data() { return data_; } | ||
| value_type const* data() const { return data_; } | ||
| size_t size() const { return size_; } | ||
|
|
||
| private: | ||
| value_type* data_ = nullptr; | ||
| size_t size_ = 0; | ||
| }; | ||
|
|
||
| template <typename T> | ||
| void device_memset_async(device_buffer<T>& buffer, int value, cudaStream_t stream) | ||
| { | ||
| assert(value <= std::numeric_limits<unsigned char>::max()); | ||
| CGA_CU_CHECK_ERR(cudaMemsetAsync(buffer.data(), value, sizeof(typename device_buffer<T>::value_type) * buffer.size(), stream)); | ||
| } | ||
|
|
||
| } // end namespace claragenomics |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,6 +11,7 @@ | |
| #pragma once | ||
|
|
||
| #include <random> | ||
| #include <stdexcept> | ||
|
|
||
| namespace claragenomics | ||
| { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| /* | ||
| * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| * | ||
| * NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| * and proprietary rights in and to this software, related documentation | ||
| * and any modifications thereto. Any use, reproduction, disclosure or | ||
| * distribution of this software and related documentation without an express | ||
| * license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <claragenomics/utils/cudaversions.hpp> | ||
| #include <limits> | ||
| #include <cstdint> | ||
|
|
||
| namespace claragenomics | ||
| { | ||
| #ifdef CGA_CUDA_BEFORE_10_1 | ||
| template <typename T> | ||
| struct numeric_limits | ||
| { | ||
| }; | ||
|
|
||
| template <> | ||
| struct numeric_limits<int16_t> | ||
| { | ||
| CGA_CONSTEXPR static __device__ int16_t max() { return INT16_MAX; } | ||
| }; | ||
|
|
||
| template <> | ||
| struct numeric_limits<int32_t> | ||
| { | ||
| CGA_CONSTEXPR static __device__ int32_t max() { return INT32_MAX; } | ||
| }; | ||
| #else | ||
| using std::numeric_limits; | ||
| #endif | ||
|
|
||
| } // end namespace claragenomics |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| /* | ||
| * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| * | ||
| * NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| * and proprietary rights in and to this software, related documentation | ||
| * and any modifications thereto. Any use, reproduction, disclosure or | ||
| * distribution of this software and related documentation without an express | ||
| * license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <cassert> | ||
| #include <type_traits> | ||
| #include <cuda_runtime_api.h> | ||
| #ifndef __CUDA_ARCH__ | ||
| #include <algorithm> | ||
| #endif | ||
|
|
||
| namespace claragenomics | ||
| { | ||
|
|
||
| template <typename Integer> | ||
| __host__ __device__ constexpr inline Integer ceiling_divide(Integer i, Integer j) | ||
| { | ||
| static_assert(std::is_integral<Integer>::value, "Arguments have to be integer types."); | ||
| assert(i >= 0); | ||
| assert(j > 0); | ||
| return (i + j - 1) / j; | ||
| } | ||
|
|
||
| template <typename T> | ||
| __host__ __device__ inline T const& min3(T const& t1, T const& t2, T const& t3) | ||
| { | ||
| #ifdef __CUDA_ARCH__ | ||
| return min(t1, min(t2, t3)); | ||
| #else | ||
| return std::min(t1, std::min(t2, t3)); | ||
| #endif | ||
| } | ||
|
|
||
| } // namespace claragenomics |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,63 @@ | ||
| /* | ||
| * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| * | ||
| * NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| * and proprietary rights in and to this software, related documentation | ||
| * and any modifications thereto. Any use, reproduction, disclosure or | ||
| * distribution of this software and related documentation without an express | ||
| * license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| */ | ||
|
|
||
| #include "aligner_global_hirschberg_myers.hpp" | ||
| #include "hirschberg_myers_gpu.cuh" | ||
| #include "batched_device_matrices.cuh" | ||
|
|
||
| #include <claragenomics/utils/mathutils.hpp> | ||
|
|
||
| namespace claragenomics | ||
| { | ||
|
|
||
| namespace cudaaligner | ||
| { | ||
|
|
||
| static constexpr int32_t hirschberg_myers_stackbuffer_size = 32 * 64; | ||
|
|
||
| struct AlignerGlobalHirschbergMyers::Workspace | ||
| { | ||
| static constexpr int32_t n_cols = 256; // has to be at least 2 (hirschberg fwd + hirschberg bwd). | ||
| Workspace(int32_t max_alignments, int32_t max_n_words, int32_t max_target_length, cudaStream_t stream) | ||
| : stackbuffer(max_alignments * hirschberg_myers_stackbuffer_size), pvs(max_alignments, max_n_words * n_cols, stream), mvs(max_alignments, max_n_words * n_cols, stream), scores(max_alignments, std::max(max_n_words * n_cols, (max_target_length + 1) * 2), stream), query_patterns(max_alignments, max_n_words * 8, stream) | ||
| { | ||
| } | ||
| device_buffer<char> stackbuffer; | ||
| batched_device_matrices<hirschbergmyers::WordType> pvs; | ||
| batched_device_matrices<hirschbergmyers::WordType> mvs; | ||
| batched_device_matrices<int32_t> scores; | ||
| batched_device_matrices<hirschbergmyers::WordType> query_patterns; | ||
| }; | ||
|
|
||
| AlignerGlobalHirschbergMyers::AlignerGlobalHirschbergMyers(int32_t max_query_length, int32_t max_target_length, int32_t max_alignments, cudaStream_t stream, int32_t device_id) | ||
| : AlignerGlobal(max_query_length, max_target_length, max_alignments, stream, device_id) | ||
| { | ||
| scoped_device_switch dev(device_id); | ||
| workspace_ = std::make_unique<Workspace>(max_alignments, ceiling_divide<int32_t>(max_query_length, sizeof(hirschbergmyers::WordType)), max_target_length, stream); | ||
| } | ||
|
|
||
| AlignerGlobalHirschbergMyers::~AlignerGlobalHirschbergMyers() | ||
| { | ||
| // Keep empty destructor to keep Workspace type incomplete in the .hpp file. | ||
| } | ||
|
|
||
| void AlignerGlobalHirschbergMyers::run_alignment(int8_t* results_d, int32_t* result_lengths_d, int32_t max_result_length, | ||
| const char* sequences_d, int32_t* sequence_lengths_d, int32_t* sequence_lengths_h, int32_t max_sequence_length, | ||
| int32_t num_alignments, cudaStream_t stream) | ||
| { | ||
| static_cast<void>(sequence_lengths_h); | ||
| hirschberg_myers_gpu(workspace_->stackbuffer, hirschberg_myers_stackbuffer_size, results_d, result_lengths_d, max_result_length, | ||
| sequences_d, sequence_lengths_d, max_sequence_length, num_alignments, | ||
| workspace_->pvs, workspace_->mvs, workspace_->scores, workspace_->query_patterns, | ||
| stream); | ||
| } | ||
|
|
||
| } // namespace cudaaligner | ||
| } // namespace claragenomics |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| /* | ||
| * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| * | ||
| * NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| * and proprietary rights in and to this software, related documentation | ||
| * and any modifications thereto. Any use, reproduction, disclosure or | ||
| * distribution of this software and related documentation without an express | ||
| * license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include "aligner_global.hpp" | ||
|
|
||
| namespace claragenomics | ||
| { | ||
|
|
||
| namespace cudaaligner | ||
| { | ||
|
|
||
| class AlignerGlobalHirschbergMyers : public AlignerGlobal | ||
| { | ||
| public: | ||
| AlignerGlobalHirschbergMyers(int32_t max_query_length, int32_t max_target_length, int32_t max_alignments, cudaStream_t stream, int32_t device_id); | ||
| virtual ~AlignerGlobalHirschbergMyers(); | ||
|
|
||
| private: | ||
| struct Workspace; | ||
|
|
||
| virtual void run_alignment(int8_t* results_d, int32_t* result_lengths_d, int32_t max_result_length, | ||
| const char* sequences_d, int32_t* sequence_lengths_d, int32_t* sequence_lengths_h, int32_t max_sequence_length, | ||
| int32_t num_alignments, cudaStream_t stream) override; | ||
|
|
||
| std::unique_ptr<Workspace> workspace_; | ||
| }; | ||
|
|
||
| } // namespace cudaaligner | ||
| } // namespace claragenomics |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,59 @@ | ||
| /* | ||
| * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| * | ||
| * NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| * and proprietary rights in and to this software, related documentation | ||
| * and any modifications thereto. Any use, reproduction, disclosure or | ||
| * distribution of this software and related documentation without an express | ||
| * license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| */ | ||
|
|
||
| #include "aligner_global_myers.hpp" | ||
| #include "myers_gpu.cuh" | ||
| #include "batched_device_matrices.cuh" | ||
|
|
||
| #include <claragenomics/utils/mathutils.hpp> | ||
|
|
||
| namespace claragenomics | ||
| { | ||
|
|
||
| namespace cudaaligner | ||
| { | ||
|
|
||
| struct AlignerGlobalMyers::Workspace | ||
| { | ||
| Workspace(int32_t max_alignments, int32_t max_n_words, int32_t max_target_length, cudaStream_t stream) | ||
| : pvs(max_alignments, max_n_words * (max_target_length + 1), stream), mvs(max_alignments, max_n_words * (max_target_length + 1), stream), scores(max_alignments, max_n_words * (max_target_length + 1), stream), query_patterns(max_alignments, max_n_words * 4, stream) | ||
| { | ||
| } | ||
| batched_device_matrices<myers::WordType> pvs; | ||
| batched_device_matrices<myers::WordType> mvs; | ||
| batched_device_matrices<int32_t> scores; | ||
| batched_device_matrices<myers::WordType> query_patterns; | ||
| }; | ||
|
|
||
| AlignerGlobalMyers::AlignerGlobalMyers(int32_t max_query_length, int32_t max_target_length, int32_t max_alignments, cudaStream_t stream, int32_t device_id) | ||
| : AlignerGlobal(max_query_length, max_target_length, max_alignments, stream, device_id), workspace_() | ||
| { | ||
| scoped_device_switch dev(device_id); | ||
| workspace_ = std::make_unique<Workspace>(max_alignments, ceiling_divide<int32_t>(max_query_length, sizeof(myers::WordType)), max_target_length, stream); | ||
| } | ||
|
|
||
| AlignerGlobalMyers::~AlignerGlobalMyers() | ||
| { | ||
| // Keep empty destructor to keep Workspace type incomplete in the .hpp file. | ||
| } | ||
|
|
||
| void AlignerGlobalMyers::run_alignment(int8_t* results_d, int32_t* result_lengths_d, int32_t max_result_length, | ||
| const char* sequences_d, int32_t* sequence_lengths_d, int32_t* sequence_lengths_h, int32_t max_sequence_length, | ||
| int32_t num_alignments, cudaStream_t stream) | ||
| { | ||
| static_cast<void>(sequence_lengths_h); | ||
| myers_gpu(results_d, result_lengths_d, max_result_length, | ||
| sequences_d, sequence_lengths_d, max_sequence_length, num_alignments, | ||
| workspace_->pvs, workspace_->mvs, workspace_->scores, workspace_->query_patterns, | ||
| stream); | ||
| } | ||
|
|
||
| } // namespace cudaaligner | ||
| } // namespace claragenomics |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| /* | ||
| * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| * | ||
| * NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| * and proprietary rights in and to this software, related documentation | ||
| * and any modifications thereto. Any use, reproduction, disclosure or | ||
| * distribution of this software and related documentation without an express | ||
| * license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include "aligner_global.hpp" | ||
|
|
||
| namespace claragenomics | ||
| { | ||
|
|
||
| namespace cudaaligner | ||
| { | ||
|
|
||
| class AlignerGlobalMyers : public AlignerGlobal | ||
| { | ||
| public: | ||
| AlignerGlobalMyers(int32_t max_query_length, int32_t max_target_length, int32_t max_alignments, cudaStream_t stream, int32_t device_id); | ||
| virtual ~AlignerGlobalMyers(); | ||
|
|
||
| private: | ||
| struct Workspace; | ||
|
|
||
| virtual void run_alignment(int8_t* results_d, int32_t* result_lengths_d, int32_t max_result_length, | ||
| const char* sequences_d, int32_t* sequence_lengths_d, int32_t* sequence_lengths_h, int32_t max_sequence_length, | ||
| int32_t num_alignments, cudaStream_t stream) override; | ||
|
|
||
| std::unique_ptr<Workspace> workspace_; | ||
| }; | ||
|
|
||
| } // namespace cudaaligner | ||
| } // namespace claragenomics |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,71 @@ | ||
| /* | ||
| * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| * | ||
| * NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| * and proprietary rights in and to this software, related documentation | ||
| * and any modifications thereto. Any use, reproduction, disclosure or | ||
| * distribution of this software and related documentation without an express | ||
| * license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| */ | ||
|
|
||
| #include "aligner_global_ukkonen.hpp" | ||
| #include "ukkonen_gpu.cuh" | ||
| #include "batched_device_matrices.cuh" | ||
|
|
||
| namespace claragenomics | ||
| { | ||
|
|
||
| namespace cudaaligner | ||
| { | ||
|
|
||
| static constexpr float max_target_query_length_difference = 0.1; // query has to be >=90% of target length | ||
|
|
||
| AlignerGlobalUkkonen::AlignerGlobalUkkonen(int32_t max_query_length, int32_t max_target_length, int32_t max_alignments, cudaStream_t stream, int32_t device_id) | ||
| : AlignerGlobal(max_query_length, max_target_length, max_alignments, stream, device_id), score_matrices_(), ukkonen_p_(100) | ||
| { | ||
| scoped_device_switch dev(device_id); | ||
| int32_t const allocated_max_length_difference = this->get_max_target_length() * max_target_query_length_difference; | ||
| score_matrices_ = std::make_unique<batched_device_matrices<nw_score_t>>( | ||
| max_alignments, | ||
| ukkonen_max_score_matrix_size(this->get_max_query_length(), this->get_max_target_length(), allocated_max_length_difference, ukkonen_p_), | ||
| stream); | ||
| } | ||
|
|
||
| AlignerGlobalUkkonen::~AlignerGlobalUkkonen() | ||
| { | ||
| // Keep empty destructor to keep batched_device_matrices type incomplete in the .hpp file. | ||
| } | ||
|
|
||
| StatusType AlignerGlobalUkkonen::add_alignment(const char* query, int32_t query_length, const char* target, int32_t target_length) | ||
| { | ||
| int32_t const allocated_max_length_difference = this->get_max_target_length() * max_target_query_length_difference; | ||
| if (std::abs(query_length - target_length) > allocated_max_length_difference) | ||
| { | ||
| CGA_LOG_DEBUG("{} {}", "Exceeded maximum length difference b/w target and query allowed : ", allocated_max_length_difference); | ||
| return StatusType::exceeded_max_alignment_difference; | ||
| } | ||
|
|
||
| return BaseType::add_alignment(query, query_length, target, target_length); | ||
| } | ||
|
|
||
| void AlignerGlobalUkkonen::run_alignment(int8_t* results_d, int32_t* result_lengths_d, int32_t max_result_length, | ||
| const char* sequences_d, int32_t* sequence_lengths_d, int32_t* sequence_lengths_h, int32_t max_sequence_length, | ||
| int32_t num_alignments, cudaStream_t stream) | ||
| { | ||
| int32_t max_length_difference = 0; | ||
| for (int32_t i = 0; i < num_alignments; ++i) | ||
| { | ||
| max_length_difference = std::max(max_length_difference, | ||
| std::abs(sequence_lengths_h[2 * i] - sequence_lengths_h[2 * i + 1])); | ||
| } | ||
|
|
||
| ukkonen_gpu(results_d, result_lengths_d, max_result_length, | ||
| sequences_d, sequence_lengths_d, | ||
| max_length_difference, max_sequence_length, num_alignments, | ||
| score_matrices_.get(), | ||
| ukkonen_p_, | ||
| stream); | ||
| } | ||
|
|
||
| } // namespace cudaaligner | ||
| } // namespace claragenomics |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| /* | ||
| * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| * | ||
| * NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| * and proprietary rights in and to this software, related documentation | ||
| * and any modifications thereto. Any use, reproduction, disclosure or | ||
| * distribution of this software and related documentation without an express | ||
| * license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include "aligner_global.hpp" | ||
| #include "ukkonen_gpu.cuh" | ||
|
|
||
| namespace claragenomics | ||
| { | ||
|
|
||
| namespace cudaaligner | ||
| { | ||
|
|
||
| template <typename T> | ||
| class batched_device_matrices; | ||
|
|
||
| class AlignerGlobalUkkonen : public AlignerGlobal | ||
| { | ||
| public: | ||
| AlignerGlobalUkkonen(int32_t max_query_length, int32_t max_target_length, int32_t max_alignments, cudaStream_t stream, int32_t device_id); | ||
| virtual ~AlignerGlobalUkkonen(); | ||
| StatusType add_alignment(const char* query, int32_t query_length, const char* target, int32_t target_length) override; | ||
|
|
||
| private: | ||
| using BaseType = AlignerGlobal; | ||
|
|
||
| virtual void run_alignment(int8_t* results_d, int32_t* result_lengths_d, int32_t max_result_length, | ||
| const char* sequences_d, int32_t* sequence_lengths_d, int32_t* sequence_lengths_h, int32_t max_sequence_length, | ||
| int32_t num_alignments, cudaStream_t stream) override; | ||
|
|
||
| std::unique_ptr<batched_device_matrices<nw_score_t>> score_matrices_; | ||
| int32_t ukkonen_p_; | ||
| }; | ||
|
|
||
| } // namespace cudaaligner | ||
| } // namespace claragenomics |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| /* | ||
| * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| * | ||
| * NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| * and proprietary rights in and to this software, related documentation | ||
| * and any modifications thereto. Any use, reproduction, disclosure or | ||
| * distribution of this software and related documentation without an express | ||
| * license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <cuda_runtime_api.h> | ||
| #include "batched_device_matrices.cuh" | ||
|
|
||
| namespace claragenomics | ||
| { | ||
| namespace cudaaligner | ||
| { | ||
|
|
||
| namespace hirschbergmyers | ||
| { | ||
| using WordType = uint32_t; | ||
| using nw_score_t = int32_t; | ||
| } // namespace hirschbergmyers | ||
|
|
||
| void hirschberg_myers_gpu(device_buffer<char>& stack_buffer, int32_t stacksize_per_alignment, | ||
| int8_t* paths_d, int32_t* path_lengths_d, int32_t max_path_length, | ||
| char const* sequences_d, | ||
| int32_t const* sequence_lengths_d, | ||
| int32_t max_target_query_length, | ||
| int32_t n_alignments, | ||
| batched_device_matrices<hirschbergmyers::WordType>& pv, | ||
| batched_device_matrices<hirschbergmyers::WordType>& mv, | ||
| batched_device_matrices<int32_t>& score, | ||
| batched_device_matrices<hirschbergmyers::WordType>& query_patterns, | ||
| cudaStream_t stream); | ||
|
|
||
| } // end namespace cudaaligner | ||
| } // end namespace claragenomics |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,124 @@ | ||
| /* | ||
| * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| * | ||
| * NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| * and proprietary rights in and to this software, related documentation | ||
| * and any modifications thereto. Any use, reproduction, disclosure or | ||
| * distribution of this software and related documentation without an express | ||
| * license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <claragenomics/utils/signed_integer_utils.hpp> | ||
|
|
||
| #include <cassert> | ||
| #include <climits> | ||
| #include <vector> | ||
|
|
||
| namespace claragenomics | ||
| { | ||
|
|
||
| namespace cudaaligner | ||
| { | ||
|
|
||
| using WordType = uint32_t; | ||
|
|
||
| int32_t myers_advance_block(WordType hmask, int32_t carry_in, WordType eq, WordType& pv, WordType& mv) | ||
| { | ||
| assert((pv & mv) == WordType(0)); | ||
|
|
||
| // Stage 1 | ||
| WordType xv = eq | mv; | ||
| if (carry_in < 0) | ||
| eq |= WordType(1); | ||
| WordType xh = (((eq & pv) + pv) ^ pv) | eq; | ||
| WordType ph = mv | (~(xh | pv)); | ||
| WordType mh = pv & xh; | ||
|
|
||
| int32_t carry_out = ((ph & hmask) == WordType(0) ? 0 : 1) - ((mh & hmask) == WordType(0) ? 0 : 1); | ||
|
|
||
| ph <<= 1; | ||
| mh <<= 1; | ||
|
|
||
| if (carry_in < 0) | ||
| mh |= WordType(1); | ||
|
|
||
| if (carry_in > 0) | ||
| ph |= WordType(1); | ||
|
|
||
| // Stage 2 | ||
| pv = mh | (~(xv | ph)); | ||
| mv = ph & xv; | ||
|
|
||
| return carry_out; | ||
| } | ||
|
|
||
| WordType myers_preprocess(char x, std::string const& query, int32_t offset) | ||
| { | ||
| assert(offset < get_size(query)); | ||
| const int32_t max_i = (std::min)(get_size(query) - offset, static_cast<int64_t>(sizeof(WordType) * CHAR_BIT)); | ||
| WordType r = 0; | ||
| for (int32_t i = 0; i < max_i; ++i) | ||
| { | ||
| if (x == query[i + offset]) | ||
| r = r | (WordType(1) << i); | ||
| } | ||
| return r; | ||
| } | ||
|
|
||
| int32_t myers_compute_edit_distance(std::string const& target, std::string const& query) | ||
| { | ||
| constexpr int32_t word_size = sizeof(WordType) * CHAR_BIT; | ||
| const int32_t query_size = get_size(query); | ||
|
|
||
| if (query_size == 0) | ||
| return get_size(target); | ||
|
|
||
| const int32_t n_words = ceiling_divide(query_size, word_size); | ||
|
|
||
| std::vector<WordType> pv(n_words, ~WordType(0)); | ||
| std::vector<WordType> mv(n_words, 0); | ||
| std::vector<int32_t> score(n_words); | ||
| for (int32_t i = 0; i < n_words; ++i) | ||
| { | ||
| score[i] = (std::min)((i + 1) * word_size, query_size); | ||
| } | ||
|
|
||
| for (const char t : target) | ||
| { | ||
| int32_t carry = 0; | ||
| for (int32_t i = 0; i < n_words; ++i) | ||
| { | ||
| const WordType peq_a = myers_preprocess('A', query, i * word_size); | ||
| const WordType peq_c = myers_preprocess('C', query, i * word_size); | ||
| const WordType peq_g = myers_preprocess('G', query, i * word_size); | ||
| const WordType peq_t = myers_preprocess('T', query, i * word_size); | ||
| const WordType hmask = WordType(1) << (i < (n_words - 1) ? word_size - 1 : query_size - (n_words - 1) * word_size - 1); | ||
|
|
||
| const WordType eq = [peq_a, peq_c, peq_g, peq_t](char x) -> WordType { | ||
| assert(x == 'A' || x == 'C' || x == 'G' || x == 'T'); | ||
| switch (x) | ||
| { | ||
| case 'A': | ||
| return peq_a; | ||
| case 'C': | ||
| return peq_c; | ||
| case 'G': | ||
| return peq_g; | ||
| case 'T': | ||
| return peq_t; | ||
| default: | ||
| return 0; | ||
| } | ||
| }(t); | ||
|
|
||
| carry = myers_advance_block(hmask, carry, eq, pv[i], mv[i]); | ||
| score[i] += carry; | ||
| } | ||
| } | ||
| return score.back(); | ||
| } | ||
|
|
||
| } // namespace cudaaligner | ||
| } // namespace claragenomics |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,43 @@ | ||
| /* | ||
| * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| * | ||
| * NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| * and proprietary rights in and to this software, related documentation | ||
| * and any modifications thereto. Any use, reproduction, disclosure or | ||
| * distribution of this software and related documentation without an express | ||
| * license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include "matrix_cpu.hpp" | ||
| #include "batched_device_matrices.cuh" | ||
|
|
||
| #include <cuda_runtime_api.h> | ||
|
|
||
| namespace claragenomics | ||
| { | ||
| namespace cudaaligner | ||
| { | ||
|
|
||
| namespace myers | ||
| { | ||
| using WordType = uint32_t; | ||
| } | ||
|
|
||
| int32_t myers_compute_edit_distance(std::string const& target, std::string const& query); | ||
| matrix<int32_t> myers_get_full_score_matrix(std::string const& target, std::string const& query); | ||
|
|
||
| void myers_gpu(int8_t* paths_d, int32_t* path_lengths_d, int32_t max_path_length, | ||
| char const* sequences_d, | ||
| int32_t const* sequence_lengths_d, | ||
| int32_t max_target_query_length, | ||
| int32_t n_alignments, | ||
| batched_device_matrices<myers::WordType>& pv, | ||
| batched_device_matrices<myers::WordType>& mv, | ||
| batched_device_matrices<int32_t>& score, | ||
| batched_device_matrices<myers::WordType>& query_patterns, | ||
| cudaStream_t stream); | ||
|
|
||
| } // end namespace cudaaligner | ||
| } // end namespace claragenomics |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,178 @@ | ||
| /* | ||
| * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| * | ||
| * NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| * and proprietary rights in and to this software, related documentation | ||
| * and any modifications thereto. Any use, reproduction, disclosure or | ||
| * distribution of this software and related documentation without an express | ||
| * license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| */ | ||
|
|
||
| #include "needleman_wunsch_cpu.hpp" | ||
|
|
||
| #include <claragenomics/cudaaligner/cudaaligner.hpp> | ||
| #include <claragenomics/utils/mathutils.hpp> | ||
|
|
||
| #include <tuple> | ||
| #include <cassert> | ||
| #include <algorithm> | ||
|
|
||
| namespace claragenomics | ||
| { | ||
|
|
||
| namespace cudaaligner | ||
| { | ||
|
|
||
| int find_alignment_position(matrix<int> const& scores) | ||
| { | ||
| int const last_i = scores.num_rows() - 1; | ||
| int min_score = std::numeric_limits<int>::max(); | ||
| int best_pos = 0; | ||
| for (int j = 0; j < scores.num_cols(); ++j) | ||
| { | ||
| if (scores(last_i, j) < min_score) | ||
| { | ||
| min_score = scores(last_i, j); | ||
| best_pos = j; | ||
| } | ||
| } | ||
| return best_pos; | ||
| } | ||
|
|
||
| std::tuple<int, std::vector<int8_t>> needleman_wunsch_backtrace_old(matrix<int> const& scores) | ||
| { | ||
| using std::get; | ||
| std::tuple<int, std::vector<int8_t>> res; | ||
| int best_pos = find_alignment_position(scores); | ||
| // | ||
| // int i = 0; | ||
| // int j = 0; | ||
| // if(best_pos < 0) | ||
| // { | ||
| // i = -best_pos; | ||
| // j = scores.num_cols()-1; | ||
| // } | ||
| // else | ||
| // { | ||
| // i = scores.num_rows()-1; | ||
| // j = best_pos; | ||
| // } | ||
| // | ||
| int i = scores.num_rows() - 1; | ||
| int j = scores.num_cols() - 1; | ||
| get<0>(res) = best_pos; | ||
| get<1>(res).reserve(std::max(scores.num_rows(), scores.num_cols())); | ||
| int myscore = scores(i, j); | ||
| while (i > 0 && j > 0) | ||
| { | ||
| int8_t r = 0; | ||
| int const above = scores(i - 1, j); | ||
| int const diag = scores(i - 1, j - 1); | ||
| int const left = scores(i, j - 1); | ||
| if (left + 1 == myscore) | ||
| { | ||
| r = static_cast<int8_t>(AlignmentState::insertion); | ||
| myscore = left; | ||
| --j; | ||
| } | ||
| else if (above + 1 == myscore) | ||
| { | ||
| r = static_cast<int8_t>(AlignmentState::deletion); | ||
| myscore = above; | ||
| --i; | ||
| } | ||
| else | ||
| { | ||
| r = (diag == myscore ? static_cast<int8_t>(AlignmentState::match) : static_cast<int8_t>(AlignmentState::mismatch)); | ||
| myscore = diag; | ||
| --i; | ||
| --j; | ||
| } | ||
| get<1>(res).push_back(r); | ||
| } | ||
| while (i > 0) | ||
| { | ||
| get<1>(res).push_back(static_cast<int8_t>(AlignmentState::deletion)); | ||
| --i; | ||
| } | ||
| while (j > 0) | ||
| { | ||
| get<1>(res).push_back(static_cast<int8_t>(AlignmentState::insertion)); | ||
| --j; | ||
| } | ||
| reverse(get<1>(res).begin(), get<1>(res).end()); | ||
| return res; | ||
| } | ||
|
|
||
| matrix<int> needleman_wunsch_build_score_matrix_naive(std::string const& text, std::string const& query) | ||
| { | ||
| int const text_size = text.size() + 1; | ||
| int const query_size = query.size() + 1; | ||
| matrix<int> scores(query_size, text_size); | ||
|
|
||
| for (int i = 0; i < query_size; ++i) | ||
| scores(i, 0) = i; | ||
| for (int j = 0; j < text_size; ++j) | ||
| scores(0, j) = j; | ||
|
|
||
| for (int j = 1; j < text_size; ++j) | ||
| for (int i = 1; i < query_size; ++i) | ||
| { | ||
| scores(i, j) = min3( | ||
| scores(i - 1, j) + 1, | ||
| scores(i, j - 1) + 1, | ||
| scores(i - 1, j - 1) + (query[i - 1] == text[j - 1] ? 0 : 1)); | ||
| } | ||
|
|
||
| return scores; | ||
| } | ||
|
|
||
| matrix<int> needleman_wunsch_build_score_matrix_diagonal(std::string const& text, std::string const& query) | ||
| { | ||
| int const query_size = query.size() + 1; | ||
| int const text_size = text.size() + 1; | ||
| assert(query_size <= text_size); | ||
| matrix<int> scores(query_size, text_size); | ||
|
|
||
| for (int i = 0; i < query_size; ++i) | ||
| scores(i, 0) = i; | ||
| for (int j = 0; j < text_size; ++j) | ||
| scores(0, j) = j; | ||
|
|
||
| for (int k = 1; k < query_size; ++k) | ||
| { | ||
| int const jmax = std::min(k, text_size); | ||
| for (int j = 1; j < jmax; ++j) | ||
| { | ||
| int const i = k - j; | ||
| scores(i, j) = min3( | ||
| scores(i - 1, j) + 1, | ||
| scores(i, j - 1) + 1, | ||
| scores(i - 1, j - 1) + (query[i - 1] == text[j - 1] ? 0 : 1)); | ||
| } | ||
| } | ||
|
|
||
| for (int l = 1; l < text_size; ++l) | ||
| { | ||
| int const imax = std::min(text_size - l, query_size - 1); | ||
| for (int k = 0; k < imax; ++k) | ||
| { | ||
| int const i = query_size - k - 1; | ||
| int const j = l + k; | ||
| scores(i, j) = min3( | ||
| scores(i - 1, j) + 1, | ||
| scores(i, j - 1) + 1, | ||
| scores(i - 1, j - 1) + (query[i - 1] == text[j - 1] ? 0 : 1)); | ||
| } | ||
| } | ||
| return scores; | ||
| } | ||
|
|
||
| std::vector<int8_t> needleman_wunsch_cpu(std::string const& text, std::string const& query) | ||
| { | ||
| matrix<int> scores = needleman_wunsch_build_score_matrix_naive(text, query); | ||
| return std::get<1>(needleman_wunsch_backtrace_old(scores)); | ||
| } | ||
|
|
||
| } // namespace cudaaligner | ||
| } // namespace claragenomics |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,6 +11,7 @@ | |
| #pragma once | ||
|
|
||
| #include <cuda_runtime_api.h> | ||
| #include <cstdint> | ||
|
|
||
| namespace claragenomics | ||
| { | ||
|
|
||