diff --git a/CMakeLists.txt b/CMakeLists.txt index 9948d173..59e0ceb1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -81,6 +81,7 @@ option(MATX_EN_CUDA_LINEINFO "Enable line information for CUDA kernels via -line option(MATX_EN_EXTENDED_LAMBDA "Enable extended lambda support for device/host lambdas" ON) option(MATX_EN_MATHDX "Enable MathDx support for kernel fusion" OFF) option(MATX_EN_UNSAFE_ALIAS_DETECTION "Enable aliased memory detection" OFF) +option(MATX_DISABLE_EXCEPTIONS "Disable C++ exceptions and log errors instead" OFF) set(MATX_EN_PYBIND11 OFF CACHE BOOL "Enable pybind11 support") @@ -206,6 +207,9 @@ if (MATX_NVTX_FLAGS) add_definitions(-DMATX_NVTX_FLAGS) target_compile_definitions(matx INTERFACE MATX_NVTX_FLAGS) endif() +if (MATX_DISABLE_EXCEPTIONS) + target_compile_definitions(matx INTERFACE MATX_DISABLE_EXCEPTIONS) +endif() if (MATX_BUILD_32_BIT) set(MATX_NVPL_INT_TYPE "lp64") target_compile_definitions(matx INTERFACE MATX_INDEX_32_BIT) diff --git a/docs_input/build.rst b/docs_input/build.rst index 519cec8f..60790249 100644 --- a/docs_input/build.rst +++ b/docs_input/build.rst @@ -201,6 +201,8 @@ By default, all of these options are OFF. - ``-DMATX_EN_MATHDX=ON`` * - Enable pybind11 Support. This option is usually not explicitly set, but is enabled by other options. - ``-DMATX_EN_PYBIND11=ON`` + * - Disable Exceptions + - ``-DMATX_DISABLE_EXCEPTIONS=ON`` NVTX Flags diff --git a/examples/black_scholes.cu b/examples/black_scholes.cu index 77e872cd..38996b82 100644 --- a/examples/black_scholes.cu +++ b/examples/black_scholes.cu @@ -150,16 +150,15 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv) index_t input_size = 100'000'000; constexpr uint32_t num_iterations = 100; float time_ms; - - tensor_t K_tensor{{input_size}}; - tensor_t S_tensor{{input_size}}; - tensor_t V_tensor{{input_size}}; - tensor_t r_tensor{{input_size}}; - tensor_t T_tensor{{input_size}}; - tensor_t output_tensor{{input_size}}; - tensor_t output_tensor2{{input_size}}; - tensor_t output_tensor3{{input_size}}; - tensor_t output_tensor4{{input_size}}; + auto K_tensor = make_tensor({input_size}); + auto S_tensor = make_tensor({input_size}); + auto V_tensor = make_tensor({input_size}); + auto r_tensor = make_tensor({input_size}); + auto T_tensor = make_tensor({input_size}); + auto output_tensor = make_tensor({input_size}); + auto output_tensor2 = make_tensor({input_size}); + auto output_tensor3 = make_tensor({input_size}); + auto output_tensor4 = make_tensor({input_size}); (K_tensor = random({input_size}, UNIFORM)).run(); (S_tensor = random({input_size}, UNIFORM)).run(); @@ -171,8 +170,6 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv) cudaStreamCreate(&stream); cudaExecutor exec{stream}; - //compute_black_scholes_matx(K_tensor, S_tensor, V_tensor, r_tensor, T_tensor, output_tensor, exec); - cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); diff --git a/include/matx/core/allocator.h b/include/matx/core/allocator.h index d374bdf8..5e43a795 100644 --- a/include/matx/core/allocator.h +++ b/include/matx/core/allocator.h @@ -43,6 +43,7 @@ #include "matx/core/error.h" #include "matx/core/nvtx.h" +#include "matx/core/log.h" #include #include @@ -122,6 +123,9 @@ struct MemTracker { size_t bytes = iter->second.size; + MATX_LOG_DEBUG("Deallocating memory: ptr={}, {} bytes, space={}, remaining={} bytes", + ptr, bytes, static_cast(iter->second.kind), matxMemoryStats.currentBytesAllocated - bytes); + matxMemoryStats.currentBytesAllocated -= bytes; switch (iter->second.kind) { @@ -187,6 +191,8 @@ struct MemTracker { } } + MATX_LOG_DEBUG("Allocating memory: {} bytes, space={}, stream={}", bytes, static_cast(space), reinterpret_cast(stream)); + switch (space) { case MATX_MANAGED_MEMORY: err = cudaMallocManaged(ptr, bytes); @@ -214,6 +220,8 @@ struct MemTracker { MATX_THROW(matxOutOfMemory, "Failed to allocate memory"); } + MATX_LOG_DEBUG("Allocated memory: ptr={}, {} bytes, total_current={} bytes", *ptr, bytes, matxMemoryStats.currentBytesAllocated + bytes); + [[maybe_unused]] std::unique_lock lck(memory_mtx); matxMemoryStats.currentBytesAllocated += bytes; matxMemoryStats.totalBytesAllocated += bytes; diff --git a/include/matx/core/cache.h b/include/matx/core/cache.h index 1ac94b34..0279316d 100644 --- a/include/matx/core/cache.h +++ b/include/matx/core/cache.h @@ -192,11 +192,15 @@ class matxCache_t { auto &common_params_cache = rmap[key]; auto cache_el = common_params_cache.find(params); if (cache_el == common_params_cache.end()) { + MATX_LOG_DEBUG("Cache MISS for transform: id={}, device={}, thread={}", + id, key.device_id, reinterpret_cast(std::hash{}(key.thread_id))); std::any tmp = mfun(); common_params_cache.insert({params, tmp}); efun(std::any_cast(tmp)); } else { + MATX_LOG_DEBUG("Cache HIT for transform: id={}, device={}, thread={}", + id, key.device_id, reinterpret_cast(std::hash{}(key.thread_id))); efun(std::any_cast(cache_el->second)); } } diff --git a/include/matx/core/error.h b/include/matx/core/error.h index e35ea35f..dd924932 100644 --- a/include/matx/core/error.h +++ b/include/matx/core/error.h @@ -42,6 +42,7 @@ #endif #include "matx/core/stacktrace.h" +#include "matx/core/log.h" #endif namespace matx @@ -150,6 +151,23 @@ namespace matx }; } +#ifdef MATX_DISABLE_EXCEPTIONS + +#define MATX_ENTER_HANDLER() { +#define MATX_EXIT_HANDLER() } + +#define MATX_THROW(e, str_arg) \ + do { \ + MATX_LOG_FATAL("matxException ({}: {}) - {}:{}", matxErrorString(e), str_arg, __FILE__, __LINE__); \ + std::stringstream matx_stack_trace; \ + detail::printStackTrace(matx_stack_trace); \ + std::string matx_stack_str = matx_stack_trace.str(); \ + MATX_LOG_FATAL("Stack Trace:\n{}", matx_stack_str); \ + std::abort(); \ + } while(0) + +#else + #define MATX_ENTER_HANDLER() \ try \ { @@ -158,8 +176,8 @@ namespace matx } \ catch (matx::detail::matxException & e) \ { \ - fprintf(stderr, "%s\n", e.what()); \ - fprintf(stderr, "Stack Trace:\n%s", e.stack.str().c_str()); \ + MATX_LOG_FATAL("{}", e.what()); \ + MATX_LOG_FATAL("Stack Trace:\n{}", e.stack.str()); \ exit(1); \ } @@ -168,6 +186,8 @@ namespace matx throw matx::detail::matxException(e, str, __FILE__, __LINE__); \ } +#endif + #if !defined(NDEBUG) && !defined(__CUDA_ARCH__) #define MATX_ASSERT(a, error) \ { \ @@ -190,7 +210,7 @@ namespace matx auto tmp = a; \ if ((tmp != expected)) \ { \ - std::cout << #a ": " << str << "(" << tmp << " != " << expected << ")\n";\ + MATX_LOG_ERROR("{}: {} ({} != {})", #a, str, static_cast(tmp), static_cast(expected)); \ MATX_THROW(error, ""); \ } \ } @@ -217,7 +237,7 @@ namespace matx const auto e_ = (e); \ if (e_ != cudaSuccess) \ { \ - fprintf(stderr, "%s:%d CUDA Error: %s (%d)\n", __FILE__,__LINE__, cudaGetErrorString(e_), e_); \ + MATX_LOG_ERROR("{}:{} CUDA Error: {} ({})", __FILE__, __LINE__, cudaGetErrorString(e_), static_cast(e_)); \ MATX_THROW(matx::matxCudaError, cudaGetErrorString(e_)); \ } \ } while (0) @@ -239,21 +259,22 @@ namespace matx compatible = (size == 0 || size == Size(i)); \ } \ if (!compatible) { \ - std::cerr << "Incompatible operator sizes: ("; \ + std::string msg = "Incompatible operator sizes: ("; \ for (int32_t i = 0; i < Rank(); i++) { \ - std::cerr << Size(i); \ + msg += std::to_string(Size(i)); \ if (i != Rank() - 1) { \ - std::cerr << ","; \ + msg += ","; \ } \ } \ - std::cerr << ") not compatible with ("; \ + msg += ") not compatible with ("; \ for (int32_t i = 0; i < Rank(); i++) { \ - std::cerr << matx::detail::get_expanded_size(op, i); \ + msg += std::to_string(matx::detail::get_expanded_size(op, i)); \ if (i != Rank() - 1) { \ - std::cerr << ","; \ + msg += ","; \ } \ } \ - std::cerr << ")" << std::endl; \ + msg += ")"; \ + MATX_LOG_ERROR("{}", msg); \ MATX_THROW(matxInvalidSize, "Incompatible operator sizes"); \ } \ } diff --git a/include/matx/core/log.h b/include/matx/core/log.h index 094cffc3..5bd93841 100644 --- a/include/matx/core/log.h +++ b/include/matx/core/log.h @@ -46,6 +46,83 @@ #include #include +// Include MatX type traits and complex types for formatting support +#include "matx/core/half.h" +#include "matx/core/half_complex.h" +#include +#include + +// Helper for formatting complex types +namespace matx { +namespace detail { + // Generic helper to format any complex-like type with real() and imag() methods + template + inline std::string format_complex(const ComplexType& c) { + return std::format("({:g}{:+g}j)", + static_cast(c.real()), + static_cast(c.imag())); + } +} +} + +// Formatter specializations for all types supported by MatX +namespace std { + // Formatter for std::complex + template + struct formatter> { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + + template + auto format(const std::complex& c, FormatContext& ctx) const { + return format_to(ctx.out(), "{}", matx::detail::format_complex(c)); + } + }; + + // Formatter for cuda::std::complex + template + struct formatter> { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + + template + auto format(const cuda::std::complex& c, FormatContext& ctx) const { + return format_to(ctx.out(), "{}", matx::detail::format_complex(c)); + } + }; + + // Formatter for matxHalfComplex (fp16/bf16 complex) + template + struct formatter> { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + + template + auto format(const matx::matxHalfComplex& c, FormatContext& ctx) const { + return format_to(ctx.out(), "{}", matx::detail::format_complex(c)); + } + }; + + // Formatter for matxFp16 (half-precision float) + template<> + struct formatter { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + + template + auto format(const matx::matxFp16& val, FormatContext& ctx) const { + return format_to(ctx.out(), "{:g}", static_cast(val)); + } + }; + + // Formatter for matxBf16 (bfloat16) + template<> + struct formatter { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + + template + auto format(const matx::matxBf16& val, FormatContext& ctx) const { + return format_to(ctx.out(), "{:g}", static_cast(val)); + } + }; +} + namespace matx { namespace detail { @@ -139,7 +216,7 @@ class Logger { std::mutex mutex_; bool show_function_; - Logger() : min_level_(LogLevel::OFF), output_stream_(&std::cout), show_function_(false) { + Logger() : min_level_(LogLevel::ERROR), output_stream_(&std::cout), show_function_(false) { // Read log level from environment const char* level_env = std::getenv("MATX_LOG_LEVEL"); if (level_env) { diff --git a/include/matx/core/make_tensor.h b/include/matx/core/make_tensor.h index f226a2df..a246547a 100644 --- a/include/matx/core/make_tensor.h +++ b/include/matx/core/make_tensor.h @@ -37,6 +37,7 @@ #include "matx/core/storage.h" #include "matx/core/tensor_desc.h" #include "matx/core/dlpack.h" +#include "matx/core/log.h" namespace matx { /** @@ -52,6 +53,15 @@ auto make_tensor( const index_t (&shape)[RANK], matxMemorySpace_t space = MATX_MANAGED_MEMORY, cudaStream_t stream = 0) { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + std::string shape_str = "["; + for (int i = 0; i < RANK; i++) { + if (i > 0) shape_str += ","; + shape_str += std::to_string(shape[i]); + } + shape_str += "]"; + MATX_LOG_DEBUG("make_tensor(shape, space, stream): shape={}, space={}, stream={}", + shape_str, static_cast(space), reinterpret_cast(stream)); DefaultDescriptor desc{shape}; auto storage = make_owning_storage(desc.TotalSize(), space, stream); @@ -69,6 +79,8 @@ template && !std::is_array_v::type>, bool> = true> auto make_tensor(Storage storage, ShapeType &&shape) { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + MATX_LOG_DEBUG("make_tensor(storage, shape): ptr={}", reinterpret_cast(storage.data())); constexpr int RANK = static_cast(cuda::std::tuple_size::type>::value); DefaultDescriptor desc{std::forward(shape)}; @@ -89,6 +101,15 @@ void make_tensor( TensorType &tensor, matxMemorySpace_t space = MATX_MANAGED_MEMORY, cudaStream_t stream = 0) { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + std::string shape_str = "["; + for (int i = 0; i < TensorType::Rank(); i++) { + if (i > 0) shape_str += ","; + shape_str += std::to_string(shape[i]); + } + shape_str += "]"; + MATX_LOG_DEBUG("make_tensor(tensor&, shape, space, stream): shape={}, space={}, stream={}", + shape_str, static_cast(space), reinterpret_cast(stream)); auto tmp = make_tensor(shape, space, stream); tensor.Shallow(tmp); @@ -108,6 +129,15 @@ auto make_tensor_p( const index_t (&shape)[RANK], matxMemorySpace_t space = MATX_MANAGED_MEMORY, cudaStream_t stream = 0) { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + std::string shape_str = "["; + for (int i = 0; i < RANK; i++) { + if (i > 0) shape_str += ","; + shape_str += std::to_string(shape[i]); + } + shape_str += "]"; + MATX_LOG_DEBUG("make_tensor_p(shape, space, stream): shape={}, space={}, stream={}", + shape_str, static_cast(space), reinterpret_cast(stream)); DefaultDescriptor desc{shape}; auto storage = make_owning_storage(desc.TotalSize(), space, stream); @@ -134,6 +164,9 @@ auto make_tensor( ShapeType &&shape, matxMemorySpace_t space = MATX_MANAGED_MEMORY, cudaStream_t stream = 0) { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + MATX_LOG_DEBUG("make_tensor(shape, space, stream): space={}, stream={}", + static_cast(space), reinterpret_cast(stream)); constexpr int rank = static_cast(cuda::std::tuple_size::type>::value); DefaultDescriptor desc{std::move(shape)}; @@ -165,6 +198,9 @@ auto make_tensor( TensorType &tensor, matxMemorySpace_t space = MATX_MANAGED_MEMORY, cudaStream_t stream = 0) { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + MATX_LOG_DEBUG("make_tensor(tensor&, shape, space, stream): space={}, stream={}", + static_cast(space), reinterpret_cast(stream)); auto tmp = make_tensor(std::forward(shape), space, stream); tensor.Shallow(tmp); @@ -189,6 +225,9 @@ auto make_tensor_p( ShapeType &&shape, matxMemorySpace_t space = MATX_MANAGED_MEMORY, cudaStream_t stream = 0) { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + MATX_LOG_DEBUG("make_tensor_p(shape, space, stream): space={}, stream={}", + static_cast(space), reinterpret_cast(stream)); DefaultDescriptor(cuda::std::tuple_size::type>::value)> desc{std::move(shape)}; @@ -212,6 +251,8 @@ template auto make_tensor( [[maybe_unused]] const std::initializer_list t, matxMemorySpace_t space = MATX_MANAGED_MEMORY, cudaStream_t stream = 0) { + MATX_LOG_DEBUG("make_tensor(0D, space, stream): space={}, stream={}", + static_cast(space), reinterpret_cast(stream)); using shape_t = cuda::std::array; return make_tensor(shape_t{}, space, stream); } @@ -230,6 +271,8 @@ template (space), reinterpret_cast(stream)); auto tmp = make_tensor({}, space, stream); tensor.Shallow(tmp); } @@ -247,6 +290,8 @@ template auto make_tensor_p( [[maybe_unused]] const std::initializer_list t, matxMemorySpace_t space = MATX_MANAGED_MEMORY, cudaStream_t stream = 0) { + MATX_LOG_DEBUG("make_tensor_p(0D, space, stream): space={}, stream={}", + static_cast(space), reinterpret_cast(stream)); cuda::std::array shape; return make_tensor_p(std::move(shape), space, stream); @@ -268,6 +313,15 @@ auto make_tensor( T *data, const index_t (&shape)[RANK], bool owning = false) { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + std::string shape_str = "["; + for (int i = 0; i < RANK; i++) { + if (i > 0) shape_str += ","; + shape_str += std::to_string(shape[i]); + } + shape_str += "]"; + MATX_LOG_DEBUG("make_tensor(data, shape, owning): ptr={}, shape={}, owning={}", + reinterpret_cast(data), shape_str, owning); DefaultDescriptor desc{shape}; auto storage = owning ? make_owning_storage(desc.TotalSize()) : make_non_owning_storage(data, desc.TotalSize()); @@ -291,6 +345,15 @@ auto make_tensor( TensorType &tensor, typename TensorType::value_type *data, const index_t (&shape)[TensorType::Rank()]) { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + std::string shape_str = "["; + for (int i = 0; i < TensorType::Rank(); i++) { + if (i > 0) shape_str += ","; + shape_str += std::to_string(shape[i]); + } + shape_str += "]"; + MATX_LOG_DEBUG("make_tensor(tensor&, data, shape): ptr={}, shape={}", + reinterpret_cast(data), shape_str); auto tmp = make_tensor(data, shape, false); tensor.Shallow(tmp); @@ -313,6 +376,9 @@ auto make_tensor( T *data, ShapeType &&shape, bool owning = false) { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + MATX_LOG_DEBUG("make_tensor(data, shape, owning): ptr={}, owning={}", + reinterpret_cast(data), owning); constexpr int RANK = static_cast(cuda::std::tuple_size::type>::value); DefaultDescriptor @@ -338,6 +404,9 @@ auto make_tensor( TensorType &tensor, typename TensorType::value_type *data, typename TensorType::shape_container &&shape) { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + MATX_LOG_DEBUG("make_tensor(tensor&, data, shape): ptr={}", reinterpret_cast(data)); + auto tmp = make_tensor(data, std::forward(shape), false); tensor.Shallow(tmp); } @@ -356,6 +425,8 @@ template auto make_tensor( T *ptr, [[maybe_unused]] const std::initializer_list t, bool owning = false) { + MATX_LOG_DEBUG("make_tensor(ptr, 0D, owning): ptr={}, owning={}", + reinterpret_cast(ptr), owning); cuda::std::array shape; return make_tensor(ptr, std::move(shape), owning); } @@ -373,6 +444,7 @@ template , bool> = true> auto make_tensor( TensorType &tensor, typename TensorType::value_type *ptr) { + MATX_LOG_DEBUG("make_tensor(tensor&, ptr, 0D): ptr={}", reinterpret_cast(ptr)); auto tmp = make_tensor(ptr, false); tensor.Shallow(tmp); } @@ -396,6 +468,9 @@ auto make_tensor_p( T *const data, ShapeType &&shape, bool owning = false) { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + MATX_LOG_DEBUG("make_tensor_p(data, shape, owning): ptr={}, owning={}", + reinterpret_cast(data), owning); constexpr int RANK = static_cast(cuda::std::tuple_size::type>::value); DefaultDescriptor @@ -417,6 +492,14 @@ template auto make_tensor( const index_t (&shape)[RANK], Allocator&& alloc) { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + std::string shape_str = "["; + for (int i = 0; i < RANK; i++) { + if (i > 0) shape_str += ","; + shape_str += std::to_string(shape[i]); + } + shape_str += "]"; + MATX_LOG_DEBUG("make_tensor(shape, alloc): shape={}", shape_str); DefaultDescriptor desc{shape}; auto storage = make_owning_storage(desc.TotalSize(), std::forward(alloc)); @@ -438,6 +521,8 @@ template (shape, alloc)"); constexpr int RANK = static_cast(cuda::std::tuple_size::type>::value); DefaultDescriptor desc{std::forward(shape)}; @@ -461,6 +546,14 @@ void make_tensor( TensorType &tensor, const index_t (&shape)[TensorType::Rank()], Allocator&& alloc) { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + std::string shape_str = "["; + for (int i = 0; i < TensorType::Rank(); i++) { + if (i > 0) shape_str += ","; + shape_str += std::to_string(shape[i]); + } + shape_str += "]"; + MATX_LOG_DEBUG("make_tensor(tensor&, shape, alloc): shape={}", shape_str); auto tmp = make_tensor(shape, std::forward(alloc)); tensor.Shallow(tmp); @@ -483,6 +576,8 @@ void make_tensor( TensorType &tensor, ShapeType &&shape, Allocator&& alloc) { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + MATX_LOG_DEBUG("make_tensor(tensor&, shape, alloc)"); auto tmp = make_tensor(std::forward(shape), std::forward(alloc)); tensor.Shallow(tmp); @@ -505,6 +600,9 @@ auto make_tensor( T* const data, D &&desc, bool owning = false) { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + MATX_LOG_DEBUG("make_tensor(data, desc, owning): ptr={}, owning={}", + reinterpret_cast(data), owning); using Dstrip = typename remove_cvref::type; auto storage = owning ? make_owning_storage(desc.TotalSize()) : make_non_owning_storage(data, desc.TotalSize()); @@ -528,6 +626,8 @@ auto make_tensor( TensorType &tensor, typename TensorType::value_type* const data, typename TensorType::desc_type &&desc) { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + MATX_LOG_DEBUG("make_tensor(tensor&, data, desc): ptr={}", reinterpret_cast(data)); // This tensor should be non-owning regardless of the original ownership since it will go out of scope at the end of the function auto tmp = make_tensor(data, std::forward(desc), false); @@ -547,6 +647,9 @@ auto make_tensor( D &&desc, matxMemorySpace_t space = MATX_MANAGED_MEMORY, cudaStream_t stream = 0) { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + MATX_LOG_DEBUG("make_tensor(desc, space, stream): space={}, stream={}", + static_cast(space), reinterpret_cast(stream)); using Dstrip = typename remove_cvref::type; @@ -570,6 +673,9 @@ auto make_tensor( TensorType &&tensor, matxMemorySpace_t space = MATX_MANAGED_MEMORY, cudaStream_t stream = 0) { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + MATX_LOG_DEBUG("make_tensor(tensor&&, desc, space, stream): space={}, stream={}", + static_cast(space), reinterpret_cast(stream)); auto tmp = make_tensor(std::forward(desc), space, stream); tensor.Shallow(tmp); @@ -594,6 +700,18 @@ auto make_tensor( T *const data, const index_t (&strides)[RANK], bool owning = false) { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + std::string shape_str = "["; + std::string strides_str = "["; + for (int i = 0; i < RANK; i++) { + if (i > 0) { shape_str += ","; strides_str += ","; } + shape_str += std::to_string(shape[i]); + strides_str += std::to_string(strides[i]); + } + shape_str += "]"; + strides_str += "]"; + MATX_LOG_DEBUG("make_tensor(data, shape, strides, owning): ptr={}, shape={}, strides={}, owning={}", + reinterpret_cast(data), shape_str, strides_str, owning); DefaultDescriptor desc{shape, strides}; auto storage = owning ? make_owning_storage(desc.TotalSize()) : make_non_owning_storage(data, desc.TotalSize()); @@ -620,6 +738,18 @@ auto make_tensor( TensorType &tensor, const index_t (&shape)[TensorType::Rank()], const index_t (&strides)[TensorType::Rank()]) { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + std::string shape_str = "["; + std::string strides_str = "["; + for (int i = 0; i < TensorType::Rank(); i++) { + if (i > 0) { shape_str += ","; strides_str += ","; } + shape_str += std::to_string(shape[i]); + strides_str += std::to_string(strides[i]); + } + shape_str += "]"; + strides_str += "]"; + MATX_LOG_DEBUG("make_tensor(tensor&, data, shape, strides): ptr={}, shape={}, strides={}", + reinterpret_cast(data), shape_str, strides_str); auto tmp = make_tensor(data, shape, strides, false); tensor.Shallow(tmp); @@ -633,6 +763,8 @@ auto make_tensor( TensorType &tensor, template auto make_static_tensor() { MATX_NVTX_START("", matx::MATX_NVTX_LOG_API) + + MATX_LOG_DEBUG("make_static_tensor()"); static_tensor_desc_t desc{}; auto storage = make_owning_storage(desc.TotalSize()); @@ -644,6 +776,8 @@ template #include #include @@ -269,6 +270,9 @@ namespace matx // Helper lambda to launch kernel auto launch_kernel = [&]() { dispatch_kernel.template operator()([&](auto launch_func) { + MATX_LOG_DEBUG("Launching CUDA kernel: rank={}, blocks=({},{},{}), threads=({},{},{}), EPT={}, stream={}", + Op::Rank(), blocks.x, blocks.y, blocks.z, threads.x, threads.y, threads.z, + static_cast(EPT), reinterpret_cast(stream_)); launch_func(); }); }; diff --git a/include/matx/generators/alternate.h b/include/matx/generators/alternate.h index bf41ba78..9d185cb3 100644 --- a/include/matx/generators/alternate.h +++ b/include/matx/generators/alternate.h @@ -33,6 +33,7 @@ #pragma once #include "matx/generators/generator1d.h" +#include "matx/core/log.h" namespace matx { @@ -47,7 +48,11 @@ namespace matx using matxop = bool; __MATX_INLINE__ std::string str() const { return "alternate"; } - __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ Alternating(index_t size) : size_(size) {}; + __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ Alternating(index_t size) : size_(size) { +#ifndef __CUDA_ARCH__ + MATX_LOG_TRACE("Alternating constructor: size={}", size); +#endif + }; template __MATX_INLINE__ __MATX_HOST__ auto get_capability([[maybe_unused]] InType &in) const { diff --git a/include/matx/generators/bartlett.h b/include/matx/generators/bartlett.h index 3351c9d9..d72d195e 100644 --- a/include/matx/generators/bartlett.h +++ b/include/matx/generators/bartlett.h @@ -33,6 +33,7 @@ #pragma once #include "matx/generators/generator1d.h" +#include "matx/core/log.h" #include namespace matx @@ -48,7 +49,11 @@ namespace matx __MATX_INLINE__ std::string str() const { return "bartlett"; } - inline __MATX_HOST__ __MATX_DEVICE__ Bartlett(index_t size) : size_(size){}; + inline __MATX_HOST__ __MATX_DEVICE__ Bartlett(index_t size) : size_(size){ +#ifndef __CUDA_ARCH__ + MATX_LOG_TRACE("Bartlett constructor: size={}", size); +#endif + }; template inline __MATX_HOST__ __MATX_DEVICE__ auto operator()(index_t i) const diff --git a/include/matx/generators/blackman.h b/include/matx/generators/blackman.h index dc4aad33..27b9b1fa 100644 --- a/include/matx/generators/blackman.h +++ b/include/matx/generators/blackman.h @@ -33,6 +33,7 @@ #pragma once #include "matx/generators/generator1d.h" +#include "matx/core/log.h" namespace matx { @@ -47,7 +48,11 @@ namespace matx __MATX_INLINE__ std::string str() const { return "blackman"; } - __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ Blackman(index_t size) : size_(size){}; + __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ Blackman(index_t size) : size_(size){ +#ifndef __CUDA_ARCH__ + MATX_LOG_TRACE("Blackman constructor: size={}", size); +#endif + }; template __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ auto operator()(index_t i) const diff --git a/include/matx/generators/chirp.h b/include/matx/generators/chirp.h index 768fa4e7..06f021e8 100644 --- a/include/matx/generators/chirp.h +++ b/include/matx/generators/chirp.h @@ -33,6 +33,7 @@ #pragma once #include "matx/generators/linspace.h" +#include "matx/core/log.h" namespace matx { @@ -70,7 +71,11 @@ namespace matx f1_(f1), t1_(t1), method_(method) - {} + { +#ifndef __CUDA_ARCH__ + MATX_LOG_TRACE("Chirp constructor: f0={}, f1={}, t1={}", f0, f1, t1); +#endif + } template __MATX_INLINE__ __MATX_HOST__ auto get_capability([[maybe_unused]] InType &in) const { @@ -131,7 +136,11 @@ namespace matx f1_(f1), t1_(t1), method_(method) - {} + { +#ifndef __CUDA_ARCH__ + MATX_LOG_TRACE("ComplexChirp constructor: f0={}, f1={}, t1={}", f0, f1, t1); +#endif + } template __MATX_INLINE__ __MATX_HOST__ auto get_capability([[maybe_unused]] InType &in) const { diff --git a/include/matx/generators/diag.h b/include/matx/generators/diag.h index 83600a53..b3dece46 100644 --- a/include/matx/generators/diag.h +++ b/include/matx/generators/diag.h @@ -32,6 +32,7 @@ #pragma once +#include "matx/core/log.h" namespace matx { @@ -56,6 +57,7 @@ namespace matx if constexpr (!is_noshape_v) { static_assert(Rank() > 1, "Diagonal generator must be used with an operator of rank 1 or higher"); } + MATX_LOG_TRACE("Diag constructor: rank={}, val={}", Rank(), val); }; template diff --git a/include/matx/generators/fftfreq.h b/include/matx/generators/fftfreq.h index d9d59519..d60e9dd4 100644 --- a/include/matx/generators/fftfreq.h +++ b/include/matx/generators/fftfreq.h @@ -33,6 +33,7 @@ #pragma once #include "matx/generators/generator1d.h" +#include "matx/core/log.h" namespace matx { @@ -52,6 +53,7 @@ namespace matx { n_ = n; d_ = d; + MATX_LOG_TRACE("FFTFreqOp constructor: n={}, d={}", n, d); } template diff --git a/include/matx/generators/flattop.h b/include/matx/generators/flattop.h index 62943b7f..295fb9a3 100644 --- a/include/matx/generators/flattop.h +++ b/include/matx/generators/flattop.h @@ -33,6 +33,7 @@ #pragma once #include "matx/generators/generator1d.h" +#include "matx/core/log.h" #include namespace matx @@ -54,7 +55,11 @@ namespace matx __MATX_INLINE__ std::string str() const { return "flattop"; } - inline __MATX_HOST__ __MATX_DEVICE__ FlatTop(index_t size) : size_(size){}; + inline __MATX_HOST__ __MATX_DEVICE__ FlatTop(index_t size) : size_(size){ +#ifndef __CUDA_ARCH__ + MATX_LOG_TRACE("FlatTop constructor: size={}", size); +#endif + }; template inline __MATX_HOST__ __MATX_DEVICE__ auto operator()(index_t i) const diff --git a/include/matx/generators/hamming.h b/include/matx/generators/hamming.h index 6a032a14..da7ef297 100644 --- a/include/matx/generators/hamming.h +++ b/include/matx/generators/hamming.h @@ -33,6 +33,7 @@ #pragma once #include "matx/generators/generator1d.h" +#include "matx/core/log.h" #include namespace matx @@ -48,7 +49,11 @@ namespace matx __MATX_INLINE__ std::string str() const { return "hamming"; } - inline __MATX_HOST__ __MATX_DEVICE__ Hamming(index_t size) : size_(size){}; + inline __MATX_HOST__ __MATX_DEVICE__ Hamming(index_t size) : size_(size){ +#ifndef __CUDA_ARCH__ + MATX_LOG_TRACE("Hamming constructor: size={}", size); +#endif + }; template inline __MATX_HOST__ __MATX_DEVICE__ auto operator()(index_t i) const diff --git a/include/matx/generators/hanning.h b/include/matx/generators/hanning.h index d40a85d0..2a291d0d 100644 --- a/include/matx/generators/hanning.h +++ b/include/matx/generators/hanning.h @@ -33,6 +33,7 @@ #pragma once #include "matx/generators/generator1d.h" +#include "matx/core/log.h" #include namespace matx @@ -48,7 +49,11 @@ namespace matx __MATX_INLINE__ std::string str() const { return "hanning"; } - inline __MATX_HOST__ __MATX_DEVICE__ Hanning(index_t size) : size_(size){}; + inline __MATX_HOST__ __MATX_DEVICE__ Hanning(index_t size) : size_(size){ +#ifndef __CUDA_ARCH__ + MATX_LOG_TRACE("Hanning constructor: size={}", size); +#endif + }; template inline __MATX_HOST__ __MATX_DEVICE__ auto operator()(index_t i) const diff --git a/include/matx/generators/linspace.h b/include/matx/generators/linspace.h index bd9dbadb..966e9a71 100644 --- a/include/matx/generators/linspace.h +++ b/include/matx/generators/linspace.h @@ -33,6 +33,7 @@ #pragma once #include "matx/generators/range.h" +#include "matx/core/log.h" namespace matx { @@ -60,6 +61,7 @@ namespace matx inline LinspaceOp(const T (&firsts)[NUM_RC], const T (&lasts)[NUM_RC], index_t count, int axis) { + MATX_LOG_TRACE("LinspaceOp constructor: NUM_RC={}, count={}, axis={}", NUM_RC, count, axis); axis_ = axis; count_ = count; for (int i = 0; i < NUM_RC; ++i) { diff --git a/include/matx/generators/logspace.h b/include/matx/generators/logspace.h index 45cfc2b5..12a0862b 100644 --- a/include/matx/generators/logspace.h +++ b/include/matx/generators/logspace.h @@ -33,6 +33,7 @@ #pragma once #include "matx/generators/generator1d.h" +#include "matx/core/log.h" #include namespace matx @@ -67,6 +68,7 @@ namespace matx else { range_ = Range{first, (last - first) / static_cast(count - 1)}; } + MATX_LOG_TRACE("Logspace constructor: first={}, last={}, count={}", first, last, count); #endif } diff --git a/include/matx/generators/meshgrid.h b/include/matx/generators/meshgrid.h index 67e1ba1e..9453e913 100644 --- a/include/matx/generators/meshgrid.h +++ b/include/matx/generators/meshgrid.h @@ -32,6 +32,7 @@ #pragma once #include "matx/operators/permute.h" +#include "matx/core/log.h" namespace matx { @@ -55,6 +56,7 @@ namespace matx __MATX_INLINE__ MeshGridOp(T1 t1, cuda::std::array shape, int idx) : t1_(t1), shape_(shape), idx_(idx) { static_assert(shape.size() == RANK ); static_assert(is_matx_op()); + MATX_LOG_TRACE("MeshGridOp constructor: rank={}, axis={}, idx={}", RANK, AXIS, idx); } diff --git a/include/matx/generators/random.h b/include/matx/generators/random.h index d8389b2e..03e1be8d 100644 --- a/include/matx/generators/random.h +++ b/include/matx/generators/random.h @@ -33,6 +33,7 @@ #pragma once #include "matx/core/error.h" +#include "matx/core/log.h" #include #include #include @@ -279,6 +280,8 @@ namespace detail { for (int i = RANK - 2; i >= 0; i--) { strides_[i] = strides_[i+1] * s[i+1]; } + + MATX_LOG_TRACE("RandomOp constructor: rank={}, total_size={}, seed={}", RANK, total_size_, seed); } template diff --git a/include/matx/generators/range.h b/include/matx/generators/range.h index 01d817eb..bcd21a61 100644 --- a/include/matx/generators/range.h +++ b/include/matx/generators/range.h @@ -33,6 +33,7 @@ #pragma once #include "matx/generators/generator1d.h" +#include "matx/core/log.h" #include namespace matx @@ -51,7 +52,9 @@ namespace matx __MATX_INLINE__ std::string str() const { return "range"; } - Range(T first, T step) : first_(first), step_(step) {} + Range(T first, T step) : first_(first), step_(step) { + MATX_LOG_TRACE("Range constructor: first={}, step={}", first, step); + } template __MATX_DEVICE__ __MATX_HOST__ __MATX_INLINE__ auto operator()(index_t idx) const diff --git a/include/matx/operators/all.h b/include/matx/operators/all.h index 9490d772..04cbdb65 100644 --- a/include/matx/operators/all.h +++ b/include/matx/operators/all.h @@ -60,6 +60,7 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "all(" + get_type_str(a_) + ")"; } __MATX_INLINE__ AllOp(const OpA &a) : a_(a) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); for (int r = 0; r < ORank; r++) { out_dims_[r] = a_.Size(r); } diff --git a/include/matx/operators/ambgfun.h b/include/matx/operators/ambgfun.h index 0ca5559d..43add18f 100644 --- a/include/matx/operators/ambgfun.h +++ b/include/matx/operators/ambgfun.h @@ -74,7 +74,7 @@ namespace matx __MATX_INLINE__ AmbgFunOp(const OpX &x, const OpY &y, double fs, AMBGFunCutType_t cut, float cut_val) : x_(x), y_(y), fs_(fs), cut_(cut), cut_val_(cut_val) { - + MATX_LOG_TRACE("{} constructor: fs={}, cut={}", str(), fs, static_cast(cut)); static_assert(OpX::Rank() == 1, "Input to ambgfun must be rank 1"); if (cut == AMBGFUN_CUT_TYPE_2D) { out_dims_[0] = 2 * x_.Size(0) - 1; diff --git a/include/matx/operators/any.h b/include/matx/operators/any.h index 9cac0f81..3183b08a 100644 --- a/include/matx/operators/any.h +++ b/include/matx/operators/any.h @@ -60,6 +60,7 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "any(" + get_type_str(a_) + ")"; } __MATX_INLINE__ AnyOp(const OpA &a) : a_(a) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); for (int r = 0; r < ORank; r++) { out_dims_[r] = a_.Size(r); } diff --git a/include/matx/operators/apply.h b/include/matx/operators/apply.h index 2579f8f0..05e8555c 100644 --- a/include/matx/operators/apply.h +++ b/include/matx/operators/apply.h @@ -59,6 +59,7 @@ namespace matx __MATX_INLINE__ ApplyOp(Func func, const Ops&... ops) : func_(func), ops_(detail::base_type_t(ops)...) { + MATX_LOG_TRACE("{} constructor: num_ops={}", str(), sizeof...(Ops)); static_assert(sizeof...(Ops) > 0, "ApplyOp requires at least one input operator"); // Initialize sizes from the first operator diff --git a/include/matx/operators/apply_idx.h b/include/matx/operators/apply_idx.h index 13adc09c..9a8d09c3 100644 --- a/include/matx/operators/apply_idx.h +++ b/include/matx/operators/apply_idx.h @@ -63,6 +63,7 @@ namespace matx __MATX_INLINE__ ApplyIdxOp(Func func, const Ops&... ops) : func_(func), ops_(detail::base_type_t(ops)...) { + MATX_LOG_TRACE("{} constructor: num_ops={}", str(), sizeof...(Ops)); static_assert(sizeof...(Ops) > 0, "ApplyIdxOp requires at least one input operator"); // Initialize sizes from the first operator diff --git a/include/matx/operators/argmax.h b/include/matx/operators/argmax.h index 82f0d201..57ff01c6 100644 --- a/include/matx/operators/argmax.h +++ b/include/matx/operators/argmax.h @@ -57,7 +57,7 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "argmax(" + get_type_str(a_) + ")"; } __MATX_INLINE__ ArgMaxOp(const OpA &a) : a_(a) { - + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); }; template diff --git a/include/matx/operators/argmin.h b/include/matx/operators/argmin.h index b8a20370..5bdce769 100644 --- a/include/matx/operators/argmin.h +++ b/include/matx/operators/argmin.h @@ -56,7 +56,7 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "argmin(" + get_type_str(a_) + ")"; } __MATX_INLINE__ ArgMinOp(const OpA &a) : a_(a) { - + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); }; template diff --git a/include/matx/operators/argminmax.h b/include/matx/operators/argminmax.h index 4f395cb9..8a978d06 100644 --- a/include/matx/operators/argminmax.h +++ b/include/matx/operators/argminmax.h @@ -57,7 +57,7 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "argminmax(" + get_type_str(a_) + ")"; } __MATX_INLINE__ ArgMinMaxOp(const OpA &a) : a_(a) { - + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); }; template diff --git a/include/matx/operators/argsort.h b/include/matx/operators/argsort.h index a733a2da..98d3020b 100644 --- a/include/matx/operators/argsort.h +++ b/include/matx/operators/argsort.h @@ -61,6 +61,7 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "argsort()"; } __MATX_INLINE__ ArgsortOp(const OpA &a, const SortDirection_t dir) : a_(a), dir_(dir) { + MATX_LOG_TRACE("{} constructor: rank={}, dir={}", str(), Rank(), static_cast(dir)); for (int r = 0; r < Rank(); r++) { out_dims_[r] = a_.Size(r); } diff --git a/include/matx/operators/at.h b/include/matx/operators/at.h index 4ee00479..2522414a 100644 --- a/include/matx/operators/at.h +++ b/include/matx/operators/at.h @@ -53,7 +53,9 @@ namespace matx using value_type = typename Op::value_type; __MATX_INLINE__ std::string str() const { return "at()"; } - __MATX_INLINE__ AtOp(const Op &op, Is... is) : op_(op), idx_{is...} {}; + __MATX_INLINE__ AtOp(const Op &op, Is... is) : op_(op), idx_{is...} { + MATX_LOG_TRACE("{} constructor: num_indices={}", str(), sizeof...(Is)); + }; template __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()([[maybe_unused]] Is2... indices) const diff --git a/include/matx/operators/base_operator.h b/include/matx/operators/base_operator.h index 393c3e76..5a4e30e6 100644 --- a/include/matx/operators/base_operator.h +++ b/include/matx/operators/base_operator.h @@ -38,6 +38,7 @@ #include "matx/core/operator_utils.h" #include "matx/core/capabilities.h" #include "matx/core/error.h" +#include "matx/core/log.h" namespace matx { diff --git a/include/matx/operators/binary_operators.h b/include/matx/operators/binary_operators.h index 1aec8d31..21343d93 100644 --- a/include/matx/operators/binary_operators.h +++ b/include/matx/operators/binary_operators.h @@ -123,6 +123,7 @@ namespace matx __MATX_INLINE__ matxBinaryOp(const I1 &in1, const I2 &in2, const Op &op) : in1_(in1), in2_(in2), op_(op) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); if constexpr (Rank() > 0) { MATX_ASSERT_COMPATIBLE_OP_SIZES(in1_); diff --git a/include/matx/operators/cart2sph.h b/include/matx/operators/cart2sph.h index ebbf45c3..470832b9 100644 --- a/include/matx/operators/cart2sph.h +++ b/include/matx/operators/cart2sph.h @@ -59,6 +59,7 @@ namespace matx __MATX_INLINE__ Cart2SphOp(const T1 &x, const T2 &y, const T3 &z) : x_(x), y_(y), z_(z) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); MATX_ASSERT_COMPATIBLE_OP_SIZES(x); MATX_ASSERT_COMPATIBLE_OP_SIZES(y); MATX_ASSERT_COMPATIBLE_OP_SIZES(z); diff --git a/include/matx/operators/cast.h b/include/matx/operators/cast.h index 430f5ae4..aba8047c 100644 --- a/include/matx/operators/cast.h +++ b/include/matx/operators/cast.h @@ -71,7 +71,9 @@ namespace matx using value_type = NewType; __MATX_INLINE__ std::string str() const { return as_type_str() + "(" + op_.str() + ")"; } - __MATX_INLINE__ CastOp(const T &op) : op_(op){}; + __MATX_INLINE__ CastOp(const T &op) : op_(op){ + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); + }; template __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices) const @@ -137,6 +139,7 @@ namespace matx __MATX_INLINE__ std::string str() const { return as_type_str() + "(" + real_op_.str() + "," + imag_op_.str() + ")"; } __MATX_INLINE__ ComplexCastOp(T1 real_op, T2 imag_op) : real_op_(real_op), imag_op_(imag_op) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); static_assert(detail::get_rank() == detail::get_rank(), "rank of real and imaginary operators must match"); if (real_op_.Shape() != imag_op_.Shape()) { MATX_THROW(matxInvalidSize, "ComplexCastOp: sizes of input operators must match in all dimensions"); diff --git a/include/matx/operators/cgsolve.h b/include/matx/operators/cgsolve.h index 91b109bc..a42c6153 100644 --- a/include/matx/operators/cgsolve.h +++ b/include/matx/operators/cgsolve.h @@ -65,7 +65,7 @@ namespace matx __MATX_INLINE__ CGSolveOp(const OpA &A, const OpB &B, double tol, int max_iters) : a_(A), b_(B), tol_(tol), max_iters_(max_iters) { - + MATX_LOG_TRACE("{} constructor: tol={}, max_iters={}", str(), tol, max_iters); for (int r = 0; r < Rank(); r++) { out_dims_[r] = b_.Size(r); } diff --git a/include/matx/operators/channelize_poly.h b/include/matx/operators/channelize_poly.h index 40e71ebd..b6628836 100644 --- a/include/matx/operators/channelize_poly.h +++ b/include/matx/operators/channelize_poly.h @@ -68,7 +68,8 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "channelize_poly(" + get_type_str(a_) + "," + get_type_str(f_) + ")";} __MATX_INLINE__ ChannelizePolyOp(const OpA &a, const FilterType &f, index_t num_channels, index_t decimation_factor) : a_(a), f_(f), num_channels_(num_channels), decimation_factor_(decimation_factor) - { + { + MATX_LOG_TRACE("{} constructor: num_channels={}, decimation_factor={}", str(), num_channels, decimation_factor); const index_t b_len = (a_.Size(OpA::Rank() - 1) + num_channels - 1) / num_channels; for (int r = 0; r < OpA::Rank()-1; r++) { diff --git a/include/matx/operators/chol.h b/include/matx/operators/chol.h index 8c57f8c9..cb7655ed 100644 --- a/include/matx/operators/chol.h +++ b/include/matx/operators/chol.h @@ -61,7 +61,9 @@ namespace detail { using can_alias = bool; // Chol is allowed to use the same input/output memory __MATX_INLINE__ std::string str() const { return "chol()"; } - __MATX_INLINE__ CholOp(const OpA &a, SolverFillMode uplo) : a_(a), uplo_(uplo) { } + __MATX_INLINE__ CholOp(const OpA &a, SolverFillMode uplo) : a_(a), uplo_(uplo) { + MATX_LOG_TRACE("{} constructor: uplo={}", str(), static_cast(uplo)); + } // This should never be called template diff --git a/include/matx/operators/clone.h b/include/matx/operators/clone.h index ed29e950..33940579 100644 --- a/include/matx/operators/clone.h +++ b/include/matx/operators/clone.h @@ -83,7 +83,7 @@ MATX_IGNORE_WARNING_POP_GCC } } MATX_ASSERT(d == T::Rank(), matxInvalidDim); - + MATX_LOG_TRACE("{} constructor: input_rank={}, output_rank={}", str(), T::Rank(), CRank); } template diff --git a/include/matx/operators/collapse.h b/include/matx/operators/collapse.h index c3afc062..8e3d153d 100644 --- a/include/matx/operators/collapse.h +++ b/include/matx/operators/collapse.h @@ -56,6 +56,7 @@ namespace matx { static_assert(DIM <= T1::Rank(), "Collapse DIM must be less than or equal to Rank() of operator"); static_assert(DIM > 1, "Must collapse multiple dims"); + MATX_LOG_TRACE("{} constructor: input_rank={}, output_rank={}", str(), T1::Rank(), T1::Rank() - DIM + 1); static_assert(T1::Rank() >= 2, "Collapse must be called on operators with rank >= 2"); // compute size of collapsed dimension diff --git a/include/matx/operators/comma.h b/include/matx/operators/comma.h index 7823cee3..005c5b1e 100644 --- a/include/matx/operators/comma.h +++ b/include/matx/operators/comma.h @@ -49,6 +49,7 @@ namespace matx class CommaOp : public BaseOp>{ public: __MATX_HOST__ __MATX_INLINE__ CommaOp(const Op1 &op1, const Op2 &op2) : op1_(op1), op2_(op2) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); MATX_STATIC_ASSERT_STR(Op1::Rank() == Op2::Rank(), matxInvalidSize, "Chained expressions using the comma operator must match in rank"); if constexpr ( Rank() > 0) { diff --git a/include/matx/operators/concat.h b/include/matx/operators/concat.h index f4a35a2e..3f7d3c79 100644 --- a/include/matx/operators/concat.h +++ b/include/matx/operators/concat.h @@ -79,6 +79,7 @@ namespace matx static_assert(RANK > 0, "Cannot concatenate rank-0 tensors"); static_assert(sizeof...(Ts) > 1, "Must have more than one tensor to concatenate"); static_assert((... && (RANK == Ts::Rank())), "concatenated ops must have the same rank"); + MATX_LOG_TRACE("{} constructor: rank={}, axis={}, num_tensors={}", str(), RANK, axis, sizeof...(Ts)); for (int32_t i = 0; i < RANK; i++) { if(i == axis_) { diff --git a/include/matx/operators/conv.h b/include/matx/operators/conv.h index f40bc64e..de96de60 100644 --- a/include/matx/operators/conv.h +++ b/include/matx/operators/conv.h @@ -75,7 +75,7 @@ namespace matx __MATX_INLINE__ Conv1DOp(const OpA &A, const OpB &B, matxConvCorrMode_t mode, matxConvCorrMethod_t method, PermDims perm) : a_(A), b_(B), mode_(mode), method_(method), perm_(perm) { - + MATX_LOG_TRACE("{} constructor: mode={}, method={}", str(), static_cast(mode), static_cast(method)); MATX_ASSERT_STR((!is_matx_type_v && !is_matx_type_v) || method == MATX_C_METHOD_DIRECT, matxInvalidType, "FFT convolutions do not support half precision float currently"); @@ -304,7 +304,7 @@ namespace detail { __MATX_INLINE__ Conv2DOp(const OpA &A, const OpB &B, matxConvCorrMode_t mode, PermDims perm) : a_(A), b_(B), mode_(mode), perm_(perm) { - + MATX_LOG_TRACE("{} constructor: mode={}", str(), static_cast(mode)); // Currently when using the axis parameter the rank of inputs must be equal if constexpr (!std::is_same_v) { for (int r = 0; r < Rank(); r++) { diff --git a/include/matx/operators/corr.h b/include/matx/operators/corr.h index 29e9a73e..7d72e63a 100644 --- a/include/matx/operators/corr.h +++ b/include/matx/operators/corr.h @@ -71,7 +71,7 @@ namespace matx __MATX_INLINE__ CorrOp(const OpA &A, const OpB &B, matxConvCorrMode_t mode, [[maybe_unused]] matxConvCorrMethod_t method, PermDims perm) : a_(A), b_(B), mode_(mode), method_(method), perm_(perm) { - + MATX_LOG_TRACE("{} constructor: mode={}, method={}", str(), static_cast(mode), static_cast(method)); // Currently when using the axis parameter the rank of inputs must be equal if constexpr (!std::is_same_v) { for (int r = 0; r < Rank(); r++) { diff --git a/include/matx/operators/cov.h b/include/matx/operators/cov.h index bb3f1441..4f16c6ea 100644 --- a/include/matx/operators/cov.h +++ b/include/matx/operators/cov.h @@ -62,7 +62,7 @@ namespace matx __MATX_INLINE__ CovOp(const OpA &A) : a_(A) { - + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); for (int r = 0; r < Rank(); r++) { out_dims_[r] = a_.Size(r); } diff --git a/include/matx/operators/cross.h b/include/matx/operators/cross.h index 65033ce5..a4f5c5f0 100644 --- a/include/matx/operators/cross.h +++ b/include/matx/operators/cross.h @@ -66,6 +66,7 @@ namespace matx __MATX_INLINE__ std::string str() const { return "cross()"; } __MATX_INLINE__ CrossOp(const OpA &A, const OpB &B) : a_(A), b_(B) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); MATX_STATIC_ASSERT_STR(OpA::Rank() >= 1 && OpB::Rank() >= 1, matxInvalidDim, "Operators to cross() must have rank GTE one."); //dims other than the last are batched, so count R-->L, beginning one-left of the right-most dim diff --git a/include/matx/operators/cumsum.h b/include/matx/operators/cumsum.h index 1e3d70dc..0d82d6a4 100644 --- a/include/matx/operators/cumsum.h +++ b/include/matx/operators/cumsum.h @@ -64,6 +64,7 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "cumsum()"; } __MATX_INLINE__ CumSumOp(const OpA &a) : a_(a) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); for (int r = 0; r < Rank(); r++) { out_dims_[r] = a_.Size(r); } diff --git a/include/matx/operators/dct.h b/include/matx/operators/dct.h index 8a3ea10e..87d63c54 100644 --- a/include/matx/operators/dct.h +++ b/include/matx/operators/dct.h @@ -54,7 +54,9 @@ template class dctOp : public BaseOp> { index_t N_; public: - dctOp(Out out, I in, index_t N) : out_(out), in_(in), N_(N) {} + dctOp(Out out, I in, index_t N) : out_(out), in_(in), N_(N) { + MATX_LOG_TRACE("dctOp constructor: N={}", N); + } template __MATX_DEVICE__ inline void operator()(index_t idx) diff --git a/include/matx/operators/dense2sparse.h b/include/matx/operators/dense2sparse.h index 8224dd09..ca0ae78e 100644 --- a/include/matx/operators/dense2sparse.h +++ b/include/matx/operators/dense2sparse.h @@ -51,7 +51,9 @@ class Dense2SparseOp : public BaseOp> { using tosparse_xform_op = bool; using value_type = typename OpA::value_type; - __MATX_INLINE__ Dense2SparseOp(const OpA &a) : a_(a) {} + __MATX_INLINE__ Dense2SparseOp(const OpA &a) : a_(a) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), OpA::Rank()); + } __MATX_INLINE__ std::string str() const { return "dense2sparse(" + get_type_str(a_) + ")"; diff --git a/include/matx/operators/det.h b/include/matx/operators/det.h index 53f4e6ba..72724414 100644 --- a/include/matx/operators/det.h +++ b/include/matx/operators/det.h @@ -55,7 +55,9 @@ namespace detail { using det_xform_op = bool; __MATX_INLINE__ std::string str() const { return "det()"; } - __MATX_INLINE__ DetOp(const OpA &a) : a_(a) { } + __MATX_INLINE__ DetOp(const OpA &a) : a_(a) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); + } // This should never be called template diff --git a/include/matx/operators/diag.h b/include/matx/operators/diag.h index dbff9964..2bbc966a 100644 --- a/include/matx/operators/diag.h +++ b/include/matx/operators/diag.h @@ -61,7 +61,9 @@ namespace matx __MATX_INLINE__ std::string str() const { return "diag(" + op_.str() + ")"; } - __MATX_INLINE__ DiagOp(const T1 &op, index_t k) : op_(op), k_(k) { } + __MATX_INLINE__ DiagOp(const T1 &op, index_t k) : op_(op), k_(k) { + MATX_LOG_TRACE("{} constructor: k={}", str(), k); + } template __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices) const diff --git a/include/matx/operators/eig.h b/include/matx/operators/eig.h index 09e6ebb8..47ac1e1e 100644 --- a/include/matx/operators/eig.h +++ b/include/matx/operators/eig.h @@ -60,7 +60,9 @@ namespace detail { using eig_xform_op = bool; __MATX_INLINE__ std::string str() const { return "eig()"; } - __MATX_INLINE__ EigOp(const OpA &a, EigenMode jobz, SolverFillMode uplo) : a_(a), jobz_(jobz), uplo_(uplo) { }; + __MATX_INLINE__ EigOp(const OpA &a, EigenMode jobz, SolverFillMode uplo) : a_(a), jobz_(jobz), uplo_(uplo) { + MATX_LOG_TRACE("{} constructor: jobz={}, uplo={}", str(), static_cast(jobz), static_cast(uplo)); + }; // This should never be called template diff --git a/include/matx/operators/einsum.h b/include/matx/operators/einsum.h index 0e476107..ad66de75 100644 --- a/include/matx/operators/einsum.h +++ b/include/matx/operators/einsum.h @@ -58,7 +58,9 @@ namespace detail { using einsum_xform_op = bool; __MATX_INLINE__ std::string str() const { return "einsum()"; } - __MATX_INLINE__ EinsumOp(const std::string &subscripts, const OpA&... ops) : subscripts_(subscripts), a_(ops...) { }; + __MATX_INLINE__ EinsumOp(const std::string &subscripts, const OpA&... ops) : subscripts_(subscripts), a_(ops...) { + MATX_LOG_TRACE("{} constructor: subscripts=\"{}\"", str(), subscripts); + }; // This should never be called template diff --git a/include/matx/operators/fft.h b/include/matx/operators/fft.h index 82e43ab6..06c4c7e4 100644 --- a/include/matx/operators/fft.h +++ b/include/matx/operators/fft.h @@ -105,6 +105,7 @@ namespace matx __MATX_INLINE__ FFTOp(const OpA &a, index_t size, PermDims perm, FFTNorm norm) : a_(a), fft_size_(size), perm_(perm), norm_(norm) { + MATX_LOG_TRACE("{} constructor: fft_size={}, norm={}", str(), size, static_cast(norm)); for (int r = 0; r < Rank(); r++) { out_dims_[r] = a_.Size(r); } @@ -697,6 +698,7 @@ namespace matx } __MATX_INLINE__ FFT2Op(const OpA &a, PermDims perm, FFTNorm norm) : a_(a), perm_(perm), norm_(norm) { + MATX_LOG_TRACE("{} constructor: norm={}", str(), static_cast(norm)); for (int r = 0; r < Rank(); r++) { out_dims_[r] = a_.Size(r); } diff --git a/include/matx/operators/fftshift.h b/include/matx/operators/fftshift.h index 031aae07..adf62a38 100644 --- a/include/matx/operators/fftshift.h +++ b/include/matx/operators/fftshift.h @@ -52,6 +52,7 @@ namespace matx __MATX_INLINE__ std::string str() const { return "fftshift(" + op_.str() + ")"; } __MATX_INLINE__ FFTShift1DOp(const T1 &op) : op_(op){ + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); static_assert(Rank() >= 1, "1D FFT shift must have a rank 1 operator or higher"); }; @@ -169,6 +170,7 @@ namespace matx __MATX_INLINE__ std::string str() const { return "fftshift(" + op_.str() + ")"; } __MATX_INLINE__ FFTShift2DOp(const T1 &op) : op_(op){ + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); static_assert(Rank() >= 2, "2D FFT shift must have a rank 2 operator or higher"); }; diff --git a/include/matx/operators/filter.h b/include/matx/operators/filter.h index c282dfa2..2af830a3 100644 --- a/include/matx/operators/filter.h +++ b/include/matx/operators/filter.h @@ -65,6 +65,7 @@ namespace detail { } __MATX_INLINE__ FilterOp(const OpA &a, const cuda::std::array h_rec, const cuda::std::array h_nonrec) : a_(a), h_rec_(h_rec), h_nonrec_(h_nonrec) { + MATX_LOG_TRACE("{} constructor: rank={}, NR={}, NNR={}", str(), Rank(), NR, NNR); for (int r = 0; r < Rank(); r++) { out_dims_[r] = a_.Size(r); } diff --git a/include/matx/operators/find.h b/include/matx/operators/find.h index 09be65d6..a51b171c 100644 --- a/include/matx/operators/find.h +++ b/include/matx/operators/find.h @@ -56,7 +56,9 @@ namespace detail { using find_xform_op = bool; __MATX_INLINE__ std::string str() const { return "find()"; } - __MATX_INLINE__ FindOp(const OpA &a, SelectType sel) : a_(a), sel_(sel) { }; + __MATX_INLINE__ FindOp(const OpA &a, SelectType sel) : a_(a), sel_(sel) { + MATX_LOG_TRACE("{} constructor", str()); + }; // This should never be called template diff --git a/include/matx/operators/find_idx.h b/include/matx/operators/find_idx.h index fedde45a..9827b1d7 100644 --- a/include/matx/operators/find_idx.h +++ b/include/matx/operators/find_idx.h @@ -56,7 +56,9 @@ namespace detail { using find_idx_xform_op = bool; __MATX_INLINE__ std::string str() const { return "find_idx()"; } - __MATX_INLINE__ FindIdxOp(const OpA &a, SelectType sel) : a_(a), sel_(sel) { }; + __MATX_INLINE__ FindIdxOp(const OpA &a, SelectType sel) : a_(a), sel_(sel) { + MATX_LOG_TRACE("{} constructor", str()); + }; // This should never be called template diff --git a/include/matx/operators/find_peaks.h b/include/matx/operators/find_peaks.h index a15d75f3..5a2d3754 100644 --- a/include/matx/operators/find_peaks.h +++ b/include/matx/operators/find_peaks.h @@ -61,6 +61,7 @@ namespace detail { __MATX_INLINE__ FindPeaksOp(const OpA &a, value_type height, value_type threshold) : a_(a), height_(height), threshold_(threshold) { + MATX_LOG_TRACE("{} constructor: height={}, threshold={}", str(), height, threshold); } template diff --git a/include/matx/operators/flatten.h b/include/matx/operators/flatten.h index e4b8e21a..f83f6125 100644 --- a/include/matx/operators/flatten.h +++ b/include/matx/operators/flatten.h @@ -55,6 +55,7 @@ namespace matx __MATX_INLINE__ FlattenOp(const T1 &op1) : op1_(op1) { static_assert(T1::Rank() > 1, "flatten has no effect on tensors of rank 0 and 1"); + MATX_LOG_TRACE("{} constructor: input_rank={}, output_rank=1", str(), T1::Rank()); } template diff --git a/include/matx/operators/frexp.h b/include/matx/operators/frexp.h index f1092a4e..58ce0c3e 100644 --- a/include/matx/operators/frexp.h +++ b/include/matx/operators/frexp.h @@ -51,6 +51,7 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "frexp()"; } __MATX_INLINE__ FrexpOp(const OpA &a) : a_(a) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); static_assert(std::is_floating_point_v || is_cuda_complex_v, "frexp() must take a floating point input"); diff --git a/include/matx/operators/hermitian.h b/include/matx/operators/hermitian.h index 4873e2c7..89db0f41 100644 --- a/include/matx/operators/hermitian.h +++ b/include/matx/operators/hermitian.h @@ -57,6 +57,7 @@ namespace matx __MATX_INLINE__ std::string str() const { return "hermitian(" + op_.str() + ")"; } __MATX_INLINE__ HermitianTransOp(const T1 &op) : op_(op) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); static_assert(Rank() >= 2, "Hermitian operation needs input with rank >= 2"); } diff --git a/include/matx/operators/hist.h b/include/matx/operators/hist.h index a9a6a888..5dbc22a9 100644 --- a/include/matx/operators/hist.h +++ b/include/matx/operators/hist.h @@ -64,6 +64,7 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "hist()"; } __MATX_INLINE__ HistOp(const OpA &a, typename OpA::value_type lower, typename OpA::value_type upper, int num_levels) : a_(a), lower_(lower), upper_(upper), num_levels_(num_levels) { + MATX_LOG_TRACE("{} constructor: num_levels={}", str(), num_levels); for (int r = 0; r < Rank(); r++) { out_dims_[r] = a_.Size(r); } diff --git a/include/matx/operators/if.h b/include/matx/operators/if.h index 44554ff3..df86ab00 100644 --- a/include/matx/operators/if.h +++ b/include/matx/operators/if.h @@ -70,6 +70,7 @@ namespace matx */ __MATX_INLINE__ IFOP(const T1 &cond, const T2 &op) : cond_(cond), op_(op) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); static_assert((!is_tensor_view_v), "Only operator emmitters are allowed in IF. Tensor views are " "not allowed"); diff --git a/include/matx/operators/ifelse.h b/include/matx/operators/ifelse.h index e6ce941d..8a46f48e 100644 --- a/include/matx/operators/ifelse.h +++ b/include/matx/operators/ifelse.h @@ -77,6 +77,7 @@ namespace matx __MATX_INLINE__ IFELSEOp(const C1 &cond, const T1 &op1, const T2 &op2) : cond_(cond), op1_(op1), op2_(op2) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); static_assert((!is_tensor_view_v && !is_tensor_view_v), "Only operator emmitters are allowed in IFELSE. Tensor views " "are not allowed"); diff --git a/include/matx/operators/index.h b/include/matx/operators/index.h index a6b07a22..68004af8 100644 --- a/include/matx/operators/index.h +++ b/include/matx/operators/index.h @@ -53,7 +53,9 @@ namespace matx using value_type = index_t; __MATX_INLINE__ std::string str() const { return "index()"; } - __MATX_INLINE__ IndexOp(int dim) : dim_(dim){}; + __MATX_INLINE__ IndexOp(int dim) : dim_(dim){ + MATX_LOG_TRACE("{} constructor: dim={}", str(), dim); + }; template __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ auto operator()(Is... indices) const diff --git a/include/matx/operators/interleaved.h b/include/matx/operators/interleaved.h index 94ad969a..2546b6f3 100644 --- a/include/matx/operators/interleaved.h +++ b/include/matx/operators/interleaved.h @@ -55,6 +55,7 @@ namespace matx __MATX_INLINE__ std::string str() const { return "interleaved(" + op_.str() + ")"; } __MATX_INLINE__ ComplexInterleavedOp(const T1 &op) : op_(op) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); static_assert(!is_complex_v>, "Complex interleaved op only works on scalar input types"); static_assert(Rank() > 0); }; diff --git a/include/matx/operators/interp.h b/include/matx/operators/interp.h index df4b8102..0ca38f05 100644 --- a/include/matx/operators/interp.h +++ b/include/matx/operators/interp.h @@ -397,6 +397,7 @@ namespace matx { xq_(xq), method_(method) { + MATX_LOG_TRACE("{} constructor: method={}", str(), static_cast(method)); if (x_.Size(x_.Rank() - 1) != v_.Size(v_.Rank() - 1)) { MATX_THROW(matxInvalidSize, "interp1: sample points and values must have the same size in the last dimension"); } diff --git a/include/matx/operators/inverse.h b/include/matx/operators/inverse.h index 813af389..d09f1ad0 100644 --- a/include/matx/operators/inverse.h +++ b/include/matx/operators/inverse.h @@ -57,7 +57,9 @@ namespace detail { using inv_xform_op = bool; __MATX_INLINE__ std::string str() const { return "inv()"; } - __MATX_INLINE__ InvOp(const OpA &a) : a_(a) {}; + __MATX_INLINE__ InvOp(const OpA &a) : a_(a) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); + }; template diff --git a/include/matx/operators/isclose.h b/include/matx/operators/isclose.h index 60e79fb4..66923962 100644 --- a/include/matx/operators/isclose.h +++ b/include/matx/operators/isclose.h @@ -55,6 +55,7 @@ namespace matx __MATX_INLINE__ IsCloseOp(const Op1 &op1, const Op2 &op2, double rtol, double atol) : op1_(op1), op2_(op2), rtol_(static_cast(rtol)), atol_(static_cast(atol)) { + MATX_LOG_TRACE("{} constructor: rtol={}, atol={}", str(), rtol, atol); static_assert(Op1::Rank() == Op2::Rank(), "Operator ranks must match in isclose()"); MATX_ASSERT_COMPATIBLE_OP_SIZES(op1); MATX_ASSERT_COMPATIBLE_OP_SIZES(op2); diff --git a/include/matx/operators/kronecker.h b/include/matx/operators/kronecker.h index 7389c6a2..ad17e926 100644 --- a/include/matx/operators/kronecker.h +++ b/include/matx/operators/kronecker.h @@ -61,6 +61,7 @@ namespace matx __MATX_INLINE__ KronOp(const T1 &op1, const T2 &op2) : op1_(op1), op2_(op2) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); static_assert(RankGTE(Rank(), 2), "Kronecker product must be used on tensors with rank 2 or higher"); } diff --git a/include/matx/operators/legendre.h b/include/matx/operators/legendre.h index 2f8bb611..d07371f2 100644 --- a/include/matx/operators/legendre.h +++ b/include/matx/operators/legendre.h @@ -95,6 +95,7 @@ namespace matx __MATX_INLINE__ std::string str() const { return "legendre(" + get_type_str(n_) + "," + get_type_str(m_) + "," + get_type_str(in_) + ")"; } __MATX_INLINE__ LegendreOp(const T1 &n, const T2 &m, const T3 &in, cuda::std::array axis) : n_(n), m_(m), in_(in), axis_(axis) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); static_assert(get_rank() <= 1, "legendre op: n must be a scalar, rank 0 or 1 operator"); static_assert(get_rank() <= 1, "legendre op: m must be a scalar, rank 0 or 1 operator"); } diff --git a/include/matx/operators/lu.h b/include/matx/operators/lu.h index 5a5fee11..54ce65fc 100644 --- a/include/matx/operators/lu.h +++ b/include/matx/operators/lu.h @@ -55,7 +55,9 @@ namespace detail { using lu_xform_op = bool; __MATX_INLINE__ std::string str() const { return "lu()"; } - __MATX_INLINE__ LUOp(const OpA &a) : a_(a) { }; + __MATX_INLINE__ LUOp(const OpA &a) : a_(a) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); + }; // This should never be called template diff --git a/include/matx/operators/matmul.h b/include/matx/operators/matmul.h index e72ec621..a341840e 100644 --- a/include/matx/operators/matmul.h +++ b/include/matx/operators/matmul.h @@ -73,6 +73,7 @@ namespace matx __MATX_INLINE__ MatMulOp(const OpA &a, const OpB &b, float alpha, float beta, PermDims perm) : a_(a), b_(b), alpha_(alpha), beta_(beta), perm_(perm) { + MATX_LOG_TRACE("{} constructor: alpha={}, beta={}", str(), alpha, beta); if constexpr (!std::is_same_v) { for (int r = 0; r < Rank(); r++) { if (r == Rank() - 2) { diff --git a/include/matx/operators/matvec.h b/include/matx/operators/matvec.h index 288ee8c1..692e0e23 100644 --- a/include/matx/operators/matvec.h +++ b/include/matx/operators/matvec.h @@ -67,7 +67,7 @@ namespace matx __MATX_INLINE__ MatVecOp(const OpA &A, const OpB &B, float alpha, float beta) : a_(A), b_(B), alpha_(alpha), beta_(beta) { - + MATX_LOG_TRACE("{} constructor: alpha={}, beta={}", str(), alpha, beta); for (int r = 0; r < Rank(); r++) { out_dims_[r] = a_.Size(r); } diff --git a/include/matx/operators/max.h b/include/matx/operators/max.h index 3a8fcdb4..415c0145 100644 --- a/include/matx/operators/max.h +++ b/include/matx/operators/max.h @@ -60,6 +60,7 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "max(" + get_type_str(a_) + ")"; } __MATX_INLINE__ MaxOp(const OpA &a) : a_(a) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); for (int r = 0; r < ORank; r++) { out_dims_[r] = a_.Size(r); } diff --git a/include/matx/operators/mean.h b/include/matx/operators/mean.h index eb1ae6ff..b928d423 100644 --- a/include/matx/operators/mean.h +++ b/include/matx/operators/mean.h @@ -60,6 +60,7 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "mean(" + get_type_str(a_) + ")"; } __MATX_INLINE__ MeanOp(const OpA &a) : a_(a) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); for (int r = 0; r < ORank; r++) { out_dims_[r] = a_.Size(r); } diff --git a/include/matx/operators/median.h b/include/matx/operators/median.h index e48709b6..5e7cc24d 100644 --- a/include/matx/operators/median.h +++ b/include/matx/operators/median.h @@ -60,6 +60,7 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "median(" + get_type_str(a_) + ")"; } __MATX_INLINE__ MedianOp(const OpA &a) : a_(a) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); for (int r = 0; r < ORank; r++) { out_dims_[r] = a_.Size(r); } diff --git a/include/matx/operators/min.h b/include/matx/operators/min.h index ae6b4855..b4fe1fe1 100644 --- a/include/matx/operators/min.h +++ b/include/matx/operators/min.h @@ -60,6 +60,7 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "min(" + get_type_str(a_) + ")"; } __MATX_INLINE__ MinOp(const OpA &a) : a_(a) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); for (int r = 0; r < ORank; r++) { out_dims_[r] = a_.Size(r); } diff --git a/include/matx/operators/norm.h b/include/matx/operators/norm.h index c85e0b00..351e2d80 100644 --- a/include/matx/operators/norm.h +++ b/include/matx/operators/norm.h @@ -70,6 +70,7 @@ namespace matx } __MATX_INLINE__ NormOp(const OpA &op, NormOrder order) : a_(op), order_(order) { + MATX_LOG_TRACE("{} constructor: order={}", str(), static_cast(order)); if constexpr (std::is_same_v) { MATX_ASSERT_STR(order == NormOrder::NONE || order == NormOrder::L1 || order == NormOrder::L2, matxInvalidParameter, "Invalid norm order used for vector mode"); diff --git a/include/matx/operators/normalize.h b/include/matx/operators/normalize.h index d4de7055..4db08cf7 100644 --- a/include/matx/operators/normalize.h +++ b/include/matx/operators/normalize.h @@ -72,10 +72,12 @@ namespace matx using self_type = NormalizeOp; __MATX_INLINE__ NormalizeOp(const OpA &op, const NORMALIZE_RANGE method): op_(op), normalize_method(method) { + MATX_LOG_TRACE("{} constructor: method={}", str(), static_cast(method)); InitNormalize(); } __MATX_INLINE__ NormalizeOp(const OpA &op, const NORMALIZE_RANGE method, const float p): op_(op), normalize_method(method), p_(p){ + MATX_LOG_TRACE("{} constructor: method={}, p={}", str(), static_cast(method), p); MATX_ASSERT_STR(normalize_method == NORMALIZE_RANGE::NORM, matxInvalidParameter, "p value can be specified for only p-norm"); InitNormalize(); } diff --git a/include/matx/operators/outer.h b/include/matx/operators/outer.h index 958adc4e..e8b4a141 100644 --- a/include/matx/operators/outer.h +++ b/include/matx/operators/outer.h @@ -67,7 +67,7 @@ namespace matx __MATX_INLINE__ OuterOp(const OpA &A, const OpB &B, float alpha, float beta) : a_(A), b_(B), alpha_(alpha), beta_(beta) { - + MATX_LOG_TRACE("{} constructor: alpha={}, beta={}", str(), alpha, beta); out_dims_[RANK - 1] = b_.Size(OpB::Rank() - 1); out_dims_[RANK - 2] = a_.Size(OpA::Rank() - 1); if constexpr (remove_cvref_t::Rank() >= remove_cvref_t::Rank()) { diff --git a/include/matx/operators/overlap.h b/include/matx/operators/overlap.h index 0ca7df56..5dc40510 100644 --- a/include/matx/operators/overlap.h +++ b/include/matx/operators/overlap.h @@ -65,7 +65,7 @@ namespace matx __MATX_INLINE__ std::string str() const { return "overlap(" + op_.str() + ")"; } __MATX_INLINE__ OverlapOp(const T &op, const cuda::std::array &windows, const cuda::std::array &strides) : op_(op) { - + MATX_LOG_TRACE("{} constructor: dim={}, rank={}", str(), DIM, Rank()); // This only works for 1D tensors going to 2D at the moment. Generalize to // higher dims later index_t window_size = windows[0]; diff --git a/include/matx/operators/pad.h b/include/matx/operators/pad.h index df768775..caf9ebca 100644 --- a/include/matx/operators/pad.h +++ b/include/matx/operators/pad.h @@ -81,6 +81,7 @@ namespace matx __MATX_INLINE__ PadOp(const T& op, int axis, const PadSizeType& pad_sizes, const value_type& pad_value, PadMode mode = MATX_PAD_MODE_CONSTANT) : op_(op), axis_(axis), pad_value_(pad_value), mode_(mode) { + MATX_LOG_TRACE("{} constructor: axis={}, mode={}", str(), axis, static_cast(mode)); static_assert(RANK > 0, "Cannot pad rank-0 tensors"); MATX_ASSERT_STR(axis >= 0 && axis < RANK, matxInvalidDim, "pad axis must be >= 0 and less than the rank of the operator"); MATX_ASSERT_STR(pad_sizes.size() == 2, matxInvalidParameter, "pad_sizes must contain exactly 2 elements [before, after]"); diff --git a/include/matx/operators/percentile.h b/include/matx/operators/percentile.h index c8b3ecc5..353b9108 100644 --- a/include/matx/operators/percentile.h +++ b/include/matx/operators/percentile.h @@ -61,6 +61,7 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "percentile(" + get_type_str(a_) + ")"; } __MATX_INLINE__ PercentileOp(const OpA &a, unsigned char q, PercentileMethod method) : a_(a), q_(q), method_(method) { + MATX_LOG_TRACE("{} constructor: q={}, method={}", str(), static_cast(q), static_cast(method)); for (int r = 0; r < ORank; r++) { out_dims_[r] = (r == ORank - 1) ? 1 : a_.Size(r); } diff --git a/include/matx/operators/permute.h b/include/matx/operators/permute.h index f7c151cc..3e408c47 100644 --- a/include/matx/operators/permute.h +++ b/include/matx/operators/permute.h @@ -74,6 +74,7 @@ namespace matx dims_[i] = dims[i]; } + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); } template diff --git a/include/matx/operators/pinv.h b/include/matx/operators/pinv.h index a109a30e..dde0cf12 100644 --- a/include/matx/operators/pinv.h +++ b/include/matx/operators/pinv.h @@ -59,6 +59,7 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "pinv()"; } __MATX_INLINE__ PinvOp(const OpA &a, float rcond) : a_(a), rcond_(rcond) { + MATX_LOG_TRACE("{} constructor: rcond={}", str(), rcond); for (int r = 0; r < Rank(); r++) { if (r >= Rank() - 2) { out_dims_[r] = (r == Rank() - 1) ? a_.Size(Rank() - 2) : a_.Size(Rank() - 1); diff --git a/include/matx/operators/planar.h b/include/matx/operators/planar.h index 78998a87..7dccefbf 100644 --- a/include/matx/operators/planar.h +++ b/include/matx/operators/planar.h @@ -54,6 +54,7 @@ namespace matx __MATX_INLINE__ ComplexPlanarOp(const T1 &op) : op_(op) { static_assert(is_complex_v>, "Complex planar op only works on complex types"); static_assert(Rank() > 0); + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); }; template diff --git a/include/matx/operators/polyval.h b/include/matx/operators/polyval.h index b028f248..34760ce5 100644 --- a/include/matx/operators/polyval.h +++ b/include/matx/operators/polyval.h @@ -56,6 +56,7 @@ namespace matx __MATX_INLINE__ std::string str() const { return "polyval()"; } __MATX_INLINE__ PolyvalOp(const Op &op, const Coeffs &coeffs) : op_(op), coeffs_(coeffs) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); MATX_STATIC_ASSERT_STR(Coeffs::Rank() == 1, matxInvalidDim, "Coefficient must be rank 1"); MATX_STATIC_ASSERT_STR(Op::Rank() == 1, matxInvalidDim, "Input operator must be rank 1"); }; diff --git a/include/matx/operators/prod.h b/include/matx/operators/prod.h index 7a9f41c7..6ac580de 100644 --- a/include/matx/operators/prod.h +++ b/include/matx/operators/prod.h @@ -60,6 +60,7 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "prod(" + get_type_str(a_) + ")"; } __MATX_INLINE__ ProdOp(const OpA &a) : a_(a) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); for (int r = 0; r < ORank; r++) { out_dims_[r] = a_.Size(r); } diff --git a/include/matx/operators/pwelch.h b/include/matx/operators/pwelch.h index 44bbea81..5ec291bd 100644 --- a/include/matx/operators/pwelch.h +++ b/include/matx/operators/pwelch.h @@ -72,6 +72,7 @@ namespace matx output_scale_mode_(output_scale_mode), fs_(fs) { + MATX_LOG_TRACE("{} constructor: nperseg={}, noverlap={}, nfft={}, fs={}", str(), nperseg, noverlap, nfft, fs); MATX_STATIC_ASSERT_STR(OpX::Rank() == 1, matxInvalidDim, "pwelch: Only input rank of 1 is supported presently"); for (int r = 0; r < OpX::Rank(); r++) { out_dims_[r] = nfft_; diff --git a/include/matx/operators/qr.h b/include/matx/operators/qr.h index 69e1b61f..ad3d4490 100644 --- a/include/matx/operators/qr.h +++ b/include/matx/operators/qr.h @@ -56,7 +56,9 @@ namespace detail { using qr_xform_op = bool; __MATX_INLINE__ std::string str() const { return "qr(" + get_type_str(a_) + ")"; } - __MATX_INLINE__ QROp(const OpA &a) : a_(a) { }; + __MATX_INLINE__ QROp(const OpA &a) : a_(a) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); + }; // This should never be called template @@ -131,7 +133,9 @@ namespace detail { using qr_solver_xform_op = bool; __MATX_INLINE__ std::string str() const { return "qr_solver()"; } - __MATX_INLINE__ SolverQROp(const OpA &a) : a_(a) { } + __MATX_INLINE__ SolverQROp(const OpA &a) : a_(a) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); + } // This should never be called template diff --git a/include/matx/operators/r2c.h b/include/matx/operators/r2c.h index 5d4cbc69..8cd185f1 100644 --- a/include/matx/operators/r2c.h +++ b/include/matx/operators/r2c.h @@ -53,6 +53,7 @@ namespace matx __MATX_INLINE__ std::string str() const { return "r2c(" + op_.str() + ")"; } __MATX_INLINE__ R2COp(const T1 &op, index_t orig) : op_(op), orig_size_(orig) { + MATX_LOG_TRACE("{} constructor: rank={}, orig_size={}", str(), Rank(), orig); static_assert(Rank() >= 1, "R2COp must have a rank 1 operator or higher"); }; diff --git a/include/matx/operators/reduce.h b/include/matx/operators/reduce.h index 0b5e32db..1fd835f3 100644 --- a/include/matx/operators/reduce.h +++ b/include/matx/operators/reduce.h @@ -66,6 +66,7 @@ namespace matx __MATX_INLINE__ ReduceOp(const OpA &A, PermDims perm, ReductionOp rop, bool init) : a_(A), perm_(perm), reduction_op_(rop), init_(init) { + MATX_LOG_TRACE("{} constructor: rop={}, init={}", str(), static_cast(rop), init); for (int r = 0; r < ORank; r++) { out_dims_[r] = a_.Size(r); } diff --git a/include/matx/operators/remap.h b/include/matx/operators/remap.h index 814fadbf..ea7f4dfb 100644 --- a/include/matx/operators/remap.h +++ b/include/matx/operators/remap.h @@ -63,7 +63,9 @@ namespace matx __MATX_INLINE__ std::string str() const { return "remap(" + op_.str() + ")"; } - __MATX_INLINE__ RemapOp(const T &op, IdxType idx) : op_(op), idx_(idx) {}; + __MATX_INLINE__ RemapOp(const T &op, IdxType idx) : op_(op), idx_(idx) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); + }; // Only supports one element per thread template diff --git a/include/matx/operators/repmat.h b/include/matx/operators/repmat.h index af6d1598..0ea2db44 100644 --- a/include/matx/operators/repmat.h +++ b/include/matx/operators/repmat.h @@ -67,6 +67,7 @@ namespace matx { reps_[dim] = reps; } + MATX_LOG_TRACE("{} constructor: rank={}, reps={}", str(), DIM, reps); } __MATX_INLINE__ RepMatOp(const T1 &op, const cuda::std::array reps) : op_(op) @@ -75,6 +76,7 @@ namespace matx { reps_[dim] = reps[dim]; } + MATX_LOG_TRACE("{} constructor: rank={}", str(), DIM); } __MATX_INLINE__ RepMatOp(const T1 &op, const index_t *reps) : op_(op) diff --git a/include/matx/operators/resample_poly.h b/include/matx/operators/resample_poly.h index 12e5aa56..752a57d6 100644 --- a/include/matx/operators/resample_poly.h +++ b/include/matx/operators/resample_poly.h @@ -66,7 +66,8 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "resample_poly(" + get_type_str(a_) + "," + get_type_str(f_) + ")";} __MATX_INLINE__ ResamplePolyOp(const OpA &a, const FilterType &f, index_t up, index_t down) : a_(a), f_(f), up_(up), down_(down) - { + { + MATX_LOG_TRACE("{} constructor: up={}, down={}", str(), up, down); const index_t up_len = a_.Size(OpA::Rank() - 1) * up_; const index_t b_len = up_len / down_ + ((up_len % down_) ? 1 : 0); diff --git a/include/matx/operators/reshape.h b/include/matx/operators/reshape.h index f470bf2a..073ae45a 100644 --- a/include/matx/operators/reshape.h +++ b/include/matx/operators/reshape.h @@ -77,6 +77,7 @@ namespace matx } MATX_ASSERT_STR(size == TotalSize(op_), matxInvalidSize, "ReshapeOp: TotalSize of reshape must match"); + MATX_LOG_TRACE("{} constructor: rank={}, total_size={}", str(), Rank(), size); }; template diff --git a/include/matx/operators/reverse.h b/include/matx/operators/reverse.h index 6d27a15e..1fc30e96 100644 --- a/include/matx/operators/reverse.h +++ b/include/matx/operators/reverse.h @@ -61,7 +61,9 @@ namespace matx __MATX_INLINE__ std::string str() const { return "reverse(" + op_.str() + ")"; } - __MATX_INLINE__ ReverseOp(const T1 &op) : op_(op){}; + __MATX_INLINE__ ReverseOp(const T1 &op) : op_(op){ + MATX_LOG_TRACE("{} constructor: rank={}", str(), DIM); + }; template static __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) get_impl(Op&& op, Is... indices) diff --git a/include/matx/operators/select.h b/include/matx/operators/select.h index 9e7f834a..a26e883c 100644 --- a/include/matx/operators/select.h +++ b/include/matx/operators/select.h @@ -57,7 +57,9 @@ namespace matx __MATX_INLINE__ std::string str() const { return "select(" + op_.str() + ")"; } - __MATX_INLINE__ SelectOp(const T &op, IdxType idx) : op_(op), idx_(idx) {}; + __MATX_INLINE__ SelectOp(const T &op, IdxType idx) : op_(op), idx_(idx) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); + }; template static __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) get_impl(Op&& op, const Idx &idx, index_t i) diff --git a/include/matx/operators/self.h b/include/matx/operators/self.h index 3bf6ed28..6d2cbddf 100644 --- a/include/matx/operators/self.h +++ b/include/matx/operators/self.h @@ -57,7 +57,9 @@ namespace matx __MATX_INLINE__ std::string str() const { return "self(" + op_.str() + ")"; } - __MATX_INLINE__ SelfOp(const T1 &op) : op_(op) {} + __MATX_INLINE__ SelfOp(const T1 &op) : op_(op) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); + } template __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices) const diff --git a/include/matx/operators/set.h b/include/matx/operators/set.h index 084917ad..9d096ed4 100644 --- a/include/matx/operators/set.h +++ b/include/matx/operators/set.h @@ -98,6 +98,7 @@ class set : public BaseOp> { */ inline set(T &out, const Op &op) : out_(out), op_(op) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), T::Rank()); static_assert(is_matx_op_lvalue() == true, "Invalid operator on LHS of set/operator="); static_assert(!is_matx_transform_op(), "Cannot use transform operator on LHS of assignment"); diff --git a/include/matx/operators/shift.h b/include/matx/operators/shift.h index b19ccf44..5132d643 100644 --- a/include/matx/operators/shift.h +++ b/include/matx/operators/shift.h @@ -63,6 +63,7 @@ namespace matx __MATX_INLINE__ ShiftOp(const T1 &op, T2 shift) : op_(op), shift_(shift) { + MATX_LOG_TRACE("{} constructor: dim={}, rank={}", str(), DIM, Rank()); static_assert(DIM < Rank(), "Dimension to shift must be less than rank of tensor"); MATX_LOOP_UNROLL diff --git a/include/matx/operators/sign.h b/include/matx/operators/sign.h index 6372d13b..7371811b 100644 --- a/include/matx/operators/sign.h +++ b/include/matx/operators/sign.h @@ -56,7 +56,9 @@ namespace matx value_type zval_; __MATX_INLINE__ std::string str() const { return "sign(" + get_type_str(op_) + ")"; } - __MATX_INLINE__ SignOp(const T &op, value_type zval) : op_(op), zval_(zval) {}; + __MATX_INLINE__ SignOp(const T &op, value_type zval) : op_(op), zval_(zval) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); + }; template __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ auto operator()(Is... indices) const diff --git a/include/matx/operators/slice.h b/include/matx/operators/slice.h index 6a62ac56..598f0d9d 100644 --- a/include/matx/operators/slice.h +++ b/include/matx/operators/slice.h @@ -108,6 +108,7 @@ namespace matx } } MATX_ASSERT_STR(d==Rank(), matxInvalidDim, "SliceOp: Number of dimensions without matxDropDim must equal new rank."); + MATX_LOG_TRACE("{} constructor: input_rank={}, output_rank={}", str(), T::Rank(), DIM); }; template diff --git a/include/matx/operators/softmax.h b/include/matx/operators/softmax.h index eacff1b1..a239b786 100644 --- a/include/matx/operators/softmax.h +++ b/include/matx/operators/softmax.h @@ -63,6 +63,7 @@ namespace matx __MATX_INLINE__ SoftmaxOp(const OpA &A, PermDims perm) : a_(A), perm_(perm) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); for (int r = 0; r < OpA::Rank(); r++) { out_dims_[r] = a_.Size(r); } diff --git a/include/matx/operators/solve.h b/include/matx/operators/solve.h index 0bc0abe1..0466c7cd 100644 --- a/include/matx/operators/solve.h +++ b/include/matx/operators/solve.h @@ -62,6 +62,7 @@ class SolveOp : public BaseOp> { using value_type = typename OpA::value_type; __MATX_INLINE__ SolveOp(const OpA &a, const OpB &b) : a_(a), b_(b) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); for (int r = 0, rank = Rank(); r < rank; r++) { out_dims_[r] = b_.Size(r); } diff --git a/include/matx/operators/sort.h b/include/matx/operators/sort.h index c76f2cd6..e80545f0 100644 --- a/include/matx/operators/sort.h +++ b/include/matx/operators/sort.h @@ -66,6 +66,7 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "sort()"; } __MATX_INLINE__ SortOp(const OpA &a, SortDirection_t dir) : a_(a), dir_(dir) { + MATX_LOG_TRACE("{} constructor: rank={}, dir={}", str(), Rank(), static_cast(dir)); for (int r = 0; r < Rank(); r++) { out_dims_[r] = a_.Size(r); } diff --git a/include/matx/operators/sparse2dense.h b/include/matx/operators/sparse2dense.h index 6f3e3dc3..c4715d45 100644 --- a/include/matx/operators/sparse2dense.h +++ b/include/matx/operators/sparse2dense.h @@ -58,6 +58,7 @@ class Sparse2DenseOp : public BaseOp> { using value_type = typename OpA::value_type; __MATX_INLINE__ Sparse2DenseOp(const OpA &a) : a_(a) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); for (int r = 0; r < Rank(); r++) { out_dims_[r] = a_.Size(r); } diff --git a/include/matx/operators/sparse2sparse.h b/include/matx/operators/sparse2sparse.h index 1f489556..a05286fb 100644 --- a/include/matx/operators/sparse2sparse.h +++ b/include/matx/operators/sparse2sparse.h @@ -51,7 +51,9 @@ class Sparse2SparseOp : public BaseOp> { using tosparse_xform_op = bool; using value_type = typename OpA::value_type; - __MATX_INLINE__ Sparse2SparseOp(const OpA &a) : a_(a) {} + __MATX_INLINE__ Sparse2SparseOp(const OpA &a) : a_(a) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), OpA::Rank()); + } __MATX_INLINE__ std::string str() const { return "sparse2sparse(" + get_type_str(a_) + ")"; diff --git a/include/matx/operators/sph2cart.h b/include/matx/operators/sph2cart.h index 2b320df0..3222d8c8 100644 --- a/include/matx/operators/sph2cart.h +++ b/include/matx/operators/sph2cart.h @@ -59,6 +59,7 @@ namespace matx __MATX_INLINE__ Sph2CartOp(const T1 &theta, const T2 &phi, const T3 &r) : theta_(theta), phi_(phi), r_(r) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); MATX_ASSERT_COMPATIBLE_OP_SIZES(theta); MATX_ASSERT_COMPATIBLE_OP_SIZES(phi); MATX_ASSERT_COMPATIBLE_OP_SIZES(r); diff --git a/include/matx/operators/stack.h b/include/matx/operators/stack.h index 05d23a35..90b392fe 100644 --- a/include/matx/operators/stack.h +++ b/include/matx/operators/stack.h @@ -75,6 +75,7 @@ namespace matx __MATX_INLINE__ StackOp(int axis, const Ts&... ts) : ops_(ts...), axis_(axis) { + MATX_LOG_TRACE("{} constructor: axis={}, num_tensors={}", str(), axis, sizeof...(Ts)); static_assert(sizeof...(Ts) > 1, "Must have more than one tensor to stack"); static_assert((... && (RANK == Ts::Rank())), "stacked ops must have the same rank"); diff --git a/include/matx/operators/stdd.h b/include/matx/operators/stdd.h index 5cf7887a..6d93aef6 100644 --- a/include/matx/operators/stdd.h +++ b/include/matx/operators/stdd.h @@ -61,6 +61,7 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "stdd(" + get_type_str(a_) + ")"; } __MATX_INLINE__ StddOp(const OpA &a, int ddof) : a_(a), ddof_(ddof) { + MATX_LOG_TRACE("{} constructor: rank={}, ddof={}", str(), Rank(), ddof); for (int r = 0; r < ORank; r++) { out_dims_[r] = a_.Size(r); } diff --git a/include/matx/operators/sum.h b/include/matx/operators/sum.h index 79673e07..6b0f7448 100644 --- a/include/matx/operators/sum.h +++ b/include/matx/operators/sum.h @@ -63,6 +63,7 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "sum(" + get_type_str(a_) + ")"; } __MATX_INLINE__ SumOp(const OpA &a) : a_(a) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); for (int r = 0; r < ORank; r++) { out_dims_[r] = a_.Size(r); } diff --git a/include/matx/operators/svd.h b/include/matx/operators/svd.h index 6d23201c..3edc0b46 100644 --- a/include/matx/operators/svd.h +++ b/include/matx/operators/svd.h @@ -58,7 +58,9 @@ namespace detail { using svd_xform_op = bool; __MATX_INLINE__ std::string str() const { return "svd(" + get_type_str(a_) + ")"; } - __MATX_INLINE__ SVDOp(const OpA &a, const SVDMode jobz, const SVDHostAlgo algo) : a_(a), jobz_(jobz), algo_(algo) { }; + __MATX_INLINE__ SVDOp(const OpA &a, const SVDMode jobz, const SVDHostAlgo algo) : a_(a), jobz_(jobz), algo_(algo) { + MATX_LOG_TRACE("{} constructor: jobz={}, algo={}", str(), static_cast(jobz), static_cast(algo)); + }; // This should never be called template @@ -167,8 +169,9 @@ namespace detail { using svd_xform_op = bool; __MATX_INLINE__ std::string str() const { return "svdpi(" + get_type_str(a_) + ")"; } - __MATX_INLINE__ SVDPIOp(const OpA &a, const OpX &x, int iterations, index_t k) : a_(a), x_(x), iterations_(iterations), k_(k) - { } + __MATX_INLINE__ SVDPIOp(const OpA &a, const OpX &x, int iterations, index_t k) : a_(a), x_(x), iterations_(iterations), k_(k) + { + MATX_LOG_TRACE("{} constructor: iterations={}, k={}", str(), iterations, k); } // This should never be called template diff --git a/include/matx/operators/toeplitz.h b/include/matx/operators/toeplitz.h index 319ca789..daf49e15 100644 --- a/include/matx/operators/toeplitz.h +++ b/include/matx/operators/toeplitz.h @@ -76,6 +76,7 @@ namespace matx __MATX_INLINE__ ToeplitzOp(const T1 &op1, const T2 &op2) : op1_(op1), op2_(op2) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); if constexpr (is_matx_op()) { static_assert(T1::Rank() == 1, "toeplitz() operator input rank must be 1"); } diff --git a/include/matx/operators/trace.h b/include/matx/operators/trace.h index 89571ada..417038ff 100644 --- a/include/matx/operators/trace.h +++ b/include/matx/operators/trace.h @@ -58,7 +58,9 @@ namespace detail { using trace_xform_op = bool; __MATX_INLINE__ std::string str() const { return "trace()"; } - __MATX_INLINE__ TraceOp(const OpA &a) : a_(a) {} + __MATX_INLINE__ TraceOp(const OpA &a) : a_(a) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); + } __MATX_HOST__ __MATX_INLINE__ auto Data() const noexcept { return ptr; } diff --git a/include/matx/operators/transpose.h b/include/matx/operators/transpose.h index 1f09ccbb..d73b100f 100644 --- a/include/matx/operators/transpose.h +++ b/include/matx/operators/transpose.h @@ -68,7 +68,8 @@ namespace detail { else { out_dims_[r] = a_.Size(r); } - } + } + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); } template diff --git a/include/matx/operators/unary_operators.h b/include/matx/operators/unary_operators.h index 29a43b04..100a8196 100644 --- a/include/matx/operators/unary_operators.h +++ b/include/matx/operators/unary_operators.h @@ -73,6 +73,7 @@ namespace matx } __MATX_INLINE__ matxUnaryOp(const I1 &in1, const Op &op) : in1_(in1), op_(op) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); if constexpr (Rank() > 0) { for (int32_t i = 0; i < Rank(); i++) { size_[i] = get_size(in1_, i); diff --git a/include/matx/operators/unique.h b/include/matx/operators/unique.h index 6c88f0fc..9296190d 100644 --- a/include/matx/operators/unique.h +++ b/include/matx/operators/unique.h @@ -55,7 +55,9 @@ namespace detail { using unique_xform_op = bool; __MATX_INLINE__ std::string str() const { return "unique()"; } - __MATX_INLINE__ UniqueOp(const OpA &a) : a_(a) { }; + __MATX_INLINE__ UniqueOp(const OpA &a) : a_(a) { + MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank()); + }; // This should never be called template diff --git a/include/matx/operators/updownsample.h b/include/matx/operators/updownsample.h index 3d078c19..71a2428d 100644 --- a/include/matx/operators/updownsample.h +++ b/include/matx/operators/updownsample.h @@ -66,6 +66,7 @@ namespace matx __MATX_INLINE__ std::string str() const { return "upsample(" + op_.str() + ")"; } __MATX_INLINE__ UpsampleOp(const T &op, int32_t dim, index_t n) : op_(op), dim_(dim), n_(n) { + MATX_LOG_TRACE("{} constructor: dim={}, n={}, rank={}", str(), dim, n, Rank()); }; template diff --git a/include/matx/operators/var.h b/include/matx/operators/var.h index fee055f1..d4810b66 100644 --- a/include/matx/operators/var.h +++ b/include/matx/operators/var.h @@ -61,6 +61,7 @@ namespace detail { __MATX_INLINE__ std::string str() const { return "var(" + get_type_str(a_) + ")"; } __MATX_INLINE__ VarOp(const OpA &a, int ddof) : a_(a), ddof_(ddof) { + MATX_LOG_TRACE("{} constructor: rank={}, ddof={}", str(), Rank(), ddof); for (int r = 0; r < ORank; r++) { out_dims_[r] = a_.Size(r); } diff --git a/include/matx/operators/zipvec.h b/include/matx/operators/zipvec.h index 53cea3e4..9f31ac6c 100644 --- a/include/matx/operators/zipvec.h +++ b/include/matx/operators/zipvec.h @@ -74,6 +74,7 @@ namespace matx __MATX_INLINE__ ZipVecOp(const Ts&... ts) : ops_(ts...) { + MATX_LOG_TRACE("{} constructor: num_ops={}, rank={}", str(), sizeof...(Ts), Rank()); static_assert(sizeof...(Ts) > 0 && sizeof...(Ts) <= 4, "Must have between 1 and 4 operators for zipvec"); static_assert((... && (RANK == Ts::Rank())), "zipped ops must have the same rank"); // All ops must have the same scalar value type; that is enforced by AggregateToVecType diff --git a/include/matx/transforms/chol/chol_cuda.h b/include/matx/transforms/chol/chol_cuda.h index c610c717..3bf28ae0 100644 --- a/include/matx/transforms/chol/chol_cuda.h +++ b/include/matx/transforms/chol/chol_cuda.h @@ -303,8 +303,10 @@ void chol_impl(OutputTensor &&out, const ATensor &a, auto params = detail::matxDnCholCUDAPlan_t::GetCholParams(tmp_out, uplo_cusolver, exec); using cache_val_type = detail::matxDnCholCUDAPlan_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("Cholesky transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(tmp_out, exec, uplo_cusolver); diff --git a/include/matx/transforms/convert/dense2sparse_cusparse.h b/include/matx/transforms/convert/dense2sparse_cusparse.h index 230536df..fc658d75 100644 --- a/include/matx/transforms/convert/dense2sparse_cusparse.h +++ b/include/matx/transforms/convert/dense2sparse_cusparse.h @@ -310,8 +310,10 @@ void dense2sparse_impl(OutputTensorType &o, const InputTensorType &A, // Lookup and cache. using cache_val_type = detail::Dense2SparseHandle_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("Dense2Sparse transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), params, + cache_id, params, [&]() { return std::make_shared(o, a, stream); }, [&](std::shared_ptr cache_type) { cache_type->Exec(o, a); diff --git a/include/matx/transforms/convert/sparse2dense_cusparse.h b/include/matx/transforms/convert/sparse2dense_cusparse.h index 183935ca..30a13679 100644 --- a/include/matx/transforms/convert/sparse2dense_cusparse.h +++ b/include/matx/transforms/convert/sparse2dense_cusparse.h @@ -257,8 +257,10 @@ void sparse2dense_impl(OutputTensorType &O, const InputTensorType &a, // Lookup and cache. using cache_val_type = detail::Sparse2DenseHandle_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("Sparse2Dense transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), params, + cache_id, params, [&]() { return std::make_shared(o, a, stream); }, [&](std::shared_ptr cache_type) { cache_type->Exec(o, a); diff --git a/include/matx/transforms/convert/sparse2sparse_cusparse.h b/include/matx/transforms/convert/sparse2sparse_cusparse.h index 1fffeebc..8266f916 100644 --- a/include/matx/transforms/convert/sparse2sparse_cusparse.h +++ b/include/matx/transforms/convert/sparse2sparse_cusparse.h @@ -226,8 +226,10 @@ void sparse2sparse_impl(OutputTensorType &o, const InputTensorType &a, // Lookup and cache. using cache_val_type = detail::Sparse2SparseHandle_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("Sparse2Sparse transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), params, + cache_id, params, [&]() { return std::make_shared(o, a, stream); }, [&](std::shared_ptr cache_type) { cache_type->Exec(o, a); diff --git a/include/matx/transforms/cov.h b/include/matx/transforms/cov.h index 1afcd51d..10161942 100644 --- a/include/matx/transforms/cov.h +++ b/include/matx/transforms/cov.h @@ -240,8 +240,10 @@ void cov_impl(TensorTypeC &c, const TensorTypeA &a, auto params = detail::matxCovHandle_t::GetCovParams(c, a, stream); using cache_val_type = detail::matxCovHandle_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("Covariance transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(c, a); diff --git a/include/matx/transforms/cub.h b/include/matx/transforms/cub.h index 4ee596a8..447ec9df 100644 --- a/include/matx/transforms/cub.h +++ b/include/matx/transforms/cub.h @@ -1502,8 +1502,10 @@ void sort_impl_inner(OutputTensor &a_out, const InputOperator &a, detail::CUB_OP_RADIX_SORT>::GetCubParams(a_out, a, stream); using cache_val_type = detail::matxCubPlan_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("CUB radix sort transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(a_out, a, p, stream); @@ -1701,8 +1703,10 @@ void cub_reduce(OutputTensor &a_out, const InputOperator &a, typename InputOpera detail::CUB_OP_REDUCE, param_type>::GetCubParams(a_out, a, stream); using cache_val_type = detail::matxCubPlan_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("CUB reduce transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(a_out, a, reduce_params, stream); @@ -1751,8 +1755,10 @@ void cub_sum(OutputTensor &a_out, const InputOperator &a, detail::CUB_OP_REDUCE_SUM>::GetCubParams(a_out, a, stream); using cache_val_type = detail::matxCubPlan_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("CUB reduce sum transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(a_out, a, detail::EmptyParams_t{}, stream); @@ -1796,8 +1802,10 @@ void cub_min(OutputTensor &a_out, const InputOperator &a, detail::CUB_OP_REDUCE_MIN>::GetCubParams(a_out, a, stream); using cache_val_type = detail::matxCubPlan_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("CUB reduce min transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(a_out, a, detail::EmptyParams_t{}, stream); @@ -1842,8 +1850,10 @@ void cub_max(OutputTensor &a_out, const InputOperator &a, detail::CUB_OP_REDUCE_MAX>::GetCubParams(a_out, a, stream); using cache_val_type = detail::matxCubPlan_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("CUB reduce max transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(a_out, a, detail::EmptyParams_t{}, stream); @@ -1907,8 +1917,10 @@ void cub_argreduce(OutputTensor &a_out, TensorIndexType &aidx_out, const InputOp #ifndef MATX_DISABLE_CUB_CACHE auto params = cache_val_type::GetCubParams(a_out_supported, aidx_out_supported, a_supported, detail::CUB_OP_SINGLE_ARG_REDUCE, stream); + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("CUB single arg reduce transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(a_out_supported, aidx_out_supported, a_supported, reduce_params, stream); @@ -1976,8 +1988,10 @@ void cub_dualargreduce(OutputTensor &a1_out, #ifndef MATX_DISABLE_CUB_CACHE auto params = cache_val_type::GetCubParams(a1_out, aidx1_out, a2_out, aidx2_out, a, detail::CUB_OP_DUAL_ARG_REDUCE, stream); + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("CUB dual arg reduce transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(a1_out, aidx1_out, a2_out, aidx2_out, a, reduce_params, stream); @@ -2256,8 +2270,10 @@ void cumsum_impl(OutputTensor &a_out, const InputOperator &a, detail::matxCubPlan_t::GetCubParams(a_out, a, stream); using cache_val_type = detail::matxCubPlan_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("CUB cumsum transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(a_out, a, detail::EmptyParams_t{}, stream); @@ -2353,8 +2369,10 @@ void hist_impl(OutputTensor &a_out, const InputOperator &a, detail::CUB_OP_HIST_EVEN>::GetCubParams(a_out, a, stream); using cache_val_type = detail::matxCubPlan_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("CUB histogram transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(a_out, a, hp, stream); @@ -2494,8 +2512,10 @@ void find_impl(OutputTensor &a_out, CountTensor &num_found, const InputOperator detail::CUB_OP_SELECT_VALS, param_type>::GetCubParams(a_out, a, stream); using cache_val_type = detail::matxCubPlan_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("CUB find values transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(a_out, a, cparams, stream); @@ -2613,8 +2633,10 @@ void find_idx_impl(OutputTensor &a_out, CountTensor &num_found, const InputOpera detail::CUB_OP_SELECT_IDX, param_type>::GetCubParams(a_out, a, stream); using cache_val_type = detail::matxCubPlan_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("CUB find indices transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(a_out, a, cparams, stream); @@ -2732,8 +2754,10 @@ void unique_impl(OutputTensor &a_out, CountTensor &num_found, const InputOperato detail::CUB_OP_UNIQUE, param_type>::GetCubParams(a_out, a, stream); using cache_val_type = detail::matxCubPlan_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("CUB unique transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(a_out, a, cparams, stream); diff --git a/include/matx/transforms/eig/eig_cuda.h b/include/matx/transforms/eig/eig_cuda.h index 572cec94..aa19ee43 100644 --- a/include/matx/transforms/eig/eig_cuda.h +++ b/include/matx/transforms/eig/eig_cuda.h @@ -348,8 +348,10 @@ void eig_impl(OutputTensor &&out, WTensor &&w, // Get cache or new eigen plan if it doesn't exist using cache_val_type = detail::matxDnEigCUDAPlan_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("Eigenvalue transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(w_new, tv, exec, jobz_cusolver, uplo_cusolver); diff --git a/include/matx/transforms/einsum.h b/include/matx/transforms/einsum.h index dc3dc10a..8ac230ea 100644 --- a/include/matx/transforms/einsum.h +++ b/include/matx/transforms/einsum.h @@ -585,8 +585,10 @@ namespace cutensor { params.stream = stream; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("Einsum transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return cuda::std::apply([&](auto&&... args) { diff --git a/include/matx/transforms/fft/fft_cuda.h b/include/matx/transforms/fft/fft_cuda.h index 218f184b..4b31dcc1 100644 --- a/include/matx/transforms/fft/fft_cuda.h +++ b/include/matx/transforms/fft/fft_cuda.h @@ -706,8 +706,10 @@ __MATX_INLINE__ void fft_impl(OutputTensor o, const InputTensor i, params.stream = stream; using cache_val_type = detail::matxCUDAFFTPlan1D_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("FFT1D forward transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(out, in); @@ -752,8 +754,10 @@ __MATX_INLINE__ void ifft_impl(OutputTensor o, const InputTensor i, params.stream = stream; using cache_val_type = detail::matxCUDAFFTPlan1D_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("FFT1D inverse transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(out, in); @@ -793,8 +797,10 @@ __MATX_INLINE__ void fft2_impl(OutputTensor o, const InputTensor i, FFTNorm norm params.stream = stream; using cache_val_type = detail::matxCUDAFFTPlan2D_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("FFT2D forward transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(out, in); @@ -835,8 +841,10 @@ __MATX_INLINE__ void ifft2_impl(OutputTensor o, const InputTensor i, FFTNorm nor // Get cache or new FFT plan if it doesn't exist using cache_val_type = detail::matxCUDAFFTPlan2D_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("FFT2D inverse transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(out, in); diff --git a/include/matx/transforms/filter.h b/include/matx/transforms/filter.h index 51ecbc51..1e5c53d0 100644 --- a/include/matx/transforms/filter.h +++ b/include/matx/transforms/filter.h @@ -477,8 +477,10 @@ void filter_impl([[maybe_unused]] OutType &o, [[maybe_unused]] const InType &i, params.hash = rhash + nrhash; using cache_val_type = detail::matxFilter_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("Filter transform: cache_id={}, NR={}, NNR={}", cache_id, NR, NNR); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return matxMakeFilter(o, i, h_rec, h_nonrec); diff --git a/include/matx/transforms/inverse.h b/include/matx/transforms/inverse.h index 368c2fe7..e2b9f68f 100644 --- a/include/matx/transforms/inverse.h +++ b/include/matx/transforms/inverse.h @@ -593,8 +593,10 @@ void inv_impl(TensorTypeAInv &a_inv, const TensorTypeA &a, auto params = detail::matxInversePlan_t::GetInverseParams(a_inv, a, stream); using cache_val_type = detail::matxInversePlan_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("Inverse transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(a_inv, a, stream); diff --git a/include/matx/transforms/lu/lu_cuda.h b/include/matx/transforms/lu/lu_cuda.h index ca3c5fa6..229e8874 100644 --- a/include/matx/transforms/lu/lu_cuda.h +++ b/include/matx/transforms/lu/lu_cuda.h @@ -292,8 +292,10 @@ void lu_impl(OutputTensor &&out, PivotTensor &&piv, // Get cache or new LU plan if it doesn't exist using cache_val_type = detail::matxDnLUCUDAPlan_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("LU transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(piv_new, tvt, exec); diff --git a/include/matx/transforms/matmul/matmul_cuda.h b/include/matx/transforms/matmul/matmul_cuda.h index 7024510a..71060058 100644 --- a/include/matx/transforms/matmul/matmul_cuda.h +++ b/include/matx/transforms/matmul/matmul_cuda.h @@ -1230,8 +1230,10 @@ void matmul_impl(TensorTypeC C, const TensorTypeA A, params.stream = stream; using cache_val_type = detail::MatMulCUDAHandle_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("MatMul transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(c, a, b); diff --git a/include/matx/transforms/matmul/matmul_cusparse.h b/include/matx/transforms/matmul/matmul_cusparse.h index 3ac0664c..83ca7824 100644 --- a/include/matx/transforms/matmul/matmul_cusparse.h +++ b/include/matx/transforms/matmul/matmul_cusparse.h @@ -320,8 +320,10 @@ void sparse_matmul_impl(TensorTypeC &C, const TensorTypeA &a, // Lookup and cache. using cache_val_type = detail::MatMulCUSPARSEHandle_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("MatMul CUSPARSE transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), params, + cache_id, params, [&]() { return std::make_shared(c, a, b, stream, alpha, beta); }, diff --git a/include/matx/transforms/matmul/matvec_cusparse.h b/include/matx/transforms/matmul/matvec_cusparse.h index d3f97979..32207106 100644 --- a/include/matx/transforms/matmul/matvec_cusparse.h +++ b/include/matx/transforms/matmul/matvec_cusparse.h @@ -343,8 +343,10 @@ void sparse_matvec_impl(TensorTypeC &C, const TensorTypeA &a, // Lookup and cache. using cache_val_type = detail::MatVecCUSPARSEHandle_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("MatVec CUSPARSE transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), params, + cache_id, params, [&]() { return std::make_shared(c, a, b, stream, alpha, beta); }, diff --git a/include/matx/transforms/qr/qr_cuda.h b/include/matx/transforms/qr/qr_cuda.h index 5d677db4..73bf6686 100644 --- a/include/matx/transforms/qr/qr_cuda.h +++ b/include/matx/transforms/qr/qr_cuda.h @@ -475,8 +475,10 @@ void qr_solver_impl(OutTensor &&out, TauTensor &&tau, // Get cache or new QR plan if it doesn't exist using cache_val_type = detail::matxDnQRCUDAPlan_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("QR transform (full): cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(tau_new, tvt, exec); @@ -848,8 +850,10 @@ void qr_econ_impl(OutTensor &&out, RTensor &&out_r, // Get cache or new QR plan if it doesn't exist using cache_val_type = detail::matxDnEconQRCUDAPlan_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("QR transform (economic): cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(tau_new, tvt, exec); diff --git a/include/matx/transforms/solve/solve_cudss.h b/include/matx/transforms/solve/solve_cudss.h index 2aa32437..568003e3 100644 --- a/include/matx/transforms/solve/solve_cudss.h +++ b/include/matx/transforms/solve/solve_cudss.h @@ -286,8 +286,10 @@ void sparse_solve_impl(TensorTypeC &C, const TensorTypeA &a, // Lookup and cache. using cache_val_type = detail::SolveCUDSSHandle_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("Solve CUDSS transform: cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), params, + cache_id, params, [&]() { return std::make_shared(c, a, b, stream); }, [&](std::shared_ptr cache_type) { cache_type->Exec(c, a, b); diff --git a/include/matx/transforms/svd/svd_cuda.h b/include/matx/transforms/svd/svd_cuda.h index ac664a37..4b602c39 100644 --- a/include/matx/transforms/svd/svd_cuda.h +++ b/include/matx/transforms/svd/svd_cuda.h @@ -1001,8 +1001,10 @@ void svd_impl(UTensor &&u, STensor &&s, // Get cache or new SVD plan if it doesn't exist using cache_val_type = detail::matxDnSVDCUDAPlan_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("SVD transform (full): cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(u_in, s_new, vt_in, at_col_maj, method, exec, job_cusolver); @@ -1039,8 +1041,10 @@ void svd_impl(UTensor &&u, STensor &&s, // Get cache or new SVD plan if it doesn't exist using cache_val_type = detail::matxDnSVDCUDAPlan_t; + auto cache_id = detail::GetCacheIdFromType(); + MATX_LOG_DEBUG("SVD transform (vectors): cache_id={}", cache_id); detail::GetCache().LookupAndExec( - detail::GetCacheIdFromType(), + cache_id, params, [&]() { return std::make_shared(u_col_maj, s_new, vt_col_maj, tvt, method, exec, job_cusolver);