From c8132ce17a373aa0f5fbb9451a076ad41684d209 Mon Sep 17 00:00:00 2001
From: cliffburdick <cburdick@nvidia.com>
Date: Wed, 29 Oct 2025 20:48:43 -0700
Subject: [PATCH 1/2] Add comprehensive logging system and exception disabling
 support

Enhances MatX's observability and error handling
capabilities by adding extensive logging throughout the codebase and providing
an option to disable exceptions.

Logging enhancements:
- Added TRACE-level logging to all operator and generator constructors
  - Log operator name via str() method and relevant constructor parameters
  - Consolidated log.h include in base_operator.h to reduce duplication
- Added DEBUG-level logging for cache operations
  - Log cache hits and misses in LookupAndExec with cache ID, device, and thread
  - Log transform-specific cache attempts with descriptive names (FFT, MatMul,
    SVD, QR, LU, Eigenvalue, Inverse, CUB, Einsum, Solve, Sparse conversions,
    Filter, Covariance)
- Added DEBUG-level logging for kernel launches
  - Log kernel parameters in CUDA executor
- Added DEBUG-level logging for memory operations
  - Log all tensor allocations and deallocations with pointer and size info
  - Log all make_tensor() calls with signature, shape, pointer, and memory kind
- Converted all printf/fprintf calls in error.h to use MatX logger
  - Error messages now use MATX_LOG_ERROR/MATX_LOG_FATAL consistently
- Changed default log level from OFF to ERROR
  - Ensures error messages are visible by default
  - Users can override via MATX_LOG_LEVEL environment variable

Exception handling improvements:
- Added MATX_DISABLE_EXCEPTIONS CMake option
  - When enabled, MATX_THROW logs fatal error and calls abort() instead of throwing
  - Provides exception-free operation for environments that don't support them
  - All error handling macros automatically adapt to exception-disabled mode
- Fixed macro parameter naming to avoid preprocessor conflicts

These changes enable detailed runtime diagnostics for debugging performance
issues, cache behavior, and memory usage while maintaining zero overhead when
logging is disabled.
---
 CMakeLists.txt                                |   4 +
 docs_input/build.rst                          |   2 +
 examples/black_scholes.cu                     |  21 ++-
 include/matx/core/allocator.h                 |   8 ++
 include/matx/core/cache.h                     |   4 +
 include/matx/core/error.h                     |  43 ++++--
 include/matx/core/log.h                       |  79 ++++++++++-
 include/matx/core/make_tensor.h               | 134 ++++++++++++++++++
 include/matx/executors/cuda.h                 |   4 +
 include/matx/generators/alternate.h           |   7 +-
 include/matx/generators/bartlett.h            |   7 +-
 include/matx/generators/blackman.h            |   7 +-
 include/matx/generators/chirp.h               |  13 +-
 include/matx/generators/diag.h                |   2 +
 include/matx/generators/fftfreq.h             |   2 +
 include/matx/generators/flattop.h             |   7 +-
 include/matx/generators/hamming.h             |   7 +-
 include/matx/generators/hanning.h             |   7 +-
 include/matx/generators/linspace.h            |   2 +
 include/matx/generators/logspace.h            |   2 +
 include/matx/generators/meshgrid.h            |   2 +
 include/matx/generators/random.h              |   3 +
 include/matx/generators/range.h               |   5 +-
 include/matx/operators/all.h                  |   1 +
 include/matx/operators/ambgfun.h              |   2 +-
 include/matx/operators/any.h                  |   1 +
 include/matx/operators/apply.h                |   1 +
 include/matx/operators/apply_idx.h            |   1 +
 include/matx/operators/argmax.h               |   2 +-
 include/matx/operators/argmin.h               |   2 +-
 include/matx/operators/argminmax.h            |   2 +-
 include/matx/operators/argsort.h              |   1 +
 include/matx/operators/at.h                   |   4 +-
 include/matx/operators/base_operator.h        |   1 +
 include/matx/operators/binary_operators.h     |   1 +
 include/matx/operators/cart2sph.h             |   1 +
 include/matx/operators/cast.h                 |   5 +-
 include/matx/operators/cgsolve.h              |   2 +-
 include/matx/operators/channelize_poly.h      |   3 +-
 include/matx/operators/chol.h                 |   4 +-
 include/matx/operators/clone.h                |   2 +-
 include/matx/operators/collapse.h             |   1 +
 include/matx/operators/comma.h                |   1 +
 include/matx/operators/concat.h               |   1 +
 include/matx/operators/conv.h                 |   4 +-
 include/matx/operators/corr.h                 |   2 +-
 include/matx/operators/cov.h                  |   2 +-
 include/matx/operators/cross.h                |   1 +
 include/matx/operators/cumsum.h               |   1 +
 include/matx/operators/dct.h                  |   4 +-
 include/matx/operators/dense2sparse.h         |   4 +-
 include/matx/operators/det.h                  |   4 +-
 include/matx/operators/diag.h                 |   4 +-
 include/matx/operators/eig.h                  |   4 +-
 include/matx/operators/einsum.h               |   4 +-
 include/matx/operators/fft.h                  |   2 +
 include/matx/operators/fftshift.h             |   2 +
 include/matx/operators/filter.h               |   1 +
 include/matx/operators/find.h                 |   4 +-
 include/matx/operators/find_idx.h             |   4 +-
 include/matx/operators/find_peaks.h           |   1 +
 include/matx/operators/flatten.h              |   1 +
 include/matx/operators/frexp.h                |   1 +
 include/matx/operators/hermitian.h            |   1 +
 include/matx/operators/hist.h                 |   1 +
 include/matx/operators/if.h                   |   1 +
 include/matx/operators/ifelse.h               |   1 +
 include/matx/operators/index.h                |   4 +-
 include/matx/operators/interleaved.h          |   1 +
 include/matx/operators/interp.h               |   1 +
 include/matx/operators/inverse.h              |   4 +-
 include/matx/operators/isclose.h              |   1 +
 include/matx/operators/kronecker.h            |   1 +
 include/matx/operators/legendre.h             |   1 +
 include/matx/operators/lu.h                   |   4 +-
 include/matx/operators/matmul.h               |   1 +
 include/matx/operators/matvec.h               |   2 +-
 include/matx/operators/max.h                  |   1 +
 include/matx/operators/mean.h                 |   1 +
 include/matx/operators/median.h               |   1 +
 include/matx/operators/min.h                  |   1 +
 include/matx/operators/norm.h                 |   1 +
 include/matx/operators/normalize.h            |   2 +
 include/matx/operators/outer.h                |   2 +-
 include/matx/operators/overlap.h              |   2 +-
 include/matx/operators/pad.h                  |   1 +
 include/matx/operators/percentile.h           |   1 +
 include/matx/operators/permute.h              |   1 +
 include/matx/operators/pinv.h                 |   1 +
 include/matx/operators/planar.h               |   1 +
 include/matx/operators/polyval.h              |   1 +
 include/matx/operators/prod.h                 |   1 +
 include/matx/operators/pwelch.h               |   1 +
 include/matx/operators/qr.h                   |   8 +-
 include/matx/operators/r2c.h                  |   1 +
 include/matx/operators/reduce.h               |   1 +
 include/matx/operators/remap.h                |   4 +-
 include/matx/operators/repmat.h               |   2 +
 include/matx/operators/resample_poly.h        |   3 +-
 include/matx/operators/reshape.h              |   1 +
 include/matx/operators/reverse.h              |   4 +-
 include/matx/operators/select.h               |   4 +-
 include/matx/operators/self.h                 |   4 +-
 include/matx/operators/set.h                  |   1 +
 include/matx/operators/shift.h                |   1 +
 include/matx/operators/sign.h                 |   4 +-
 include/matx/operators/slice.h                |   1 +
 include/matx/operators/softmax.h              |   1 +
 include/matx/operators/solve.h                |   1 +
 include/matx/operators/sort.h                 |   1 +
 include/matx/operators/sparse2dense.h         |   1 +
 include/matx/operators/sparse2sparse.h        |   4 +-
 include/matx/operators/sph2cart.h             |   1 +
 include/matx/operators/stack.h                |   1 +
 include/matx/operators/stdd.h                 |   1 +
 include/matx/operators/sum.h                  |   1 +
 include/matx/operators/svd.h                  |   9 +-
 include/matx/operators/toeplitz.h             |   1 +
 include/matx/operators/trace.h                |   4 +-
 include/matx/operators/transpose.h            |   3 +-
 include/matx/operators/unary_operators.h      |   1 +
 include/matx/operators/unique.h               |   4 +-
 include/matx/operators/updownsample.h         |   1 +
 include/matx/operators/var.h                  |   1 +
 include/matx/operators/zipvec.h               |   1 +
 include/matx/transforms/chol/chol_cuda.h      |   4 +-
 .../convert/dense2sparse_cusparse.h           |   4 +-
 .../convert/sparse2dense_cusparse.h           |   4 +-
 .../convert/sparse2sparse_cusparse.h          |   4 +-
 include/matx/transforms/cov.h                 |   4 +-
 include/matx/transforms/cub.h                 |  48 +++++--
 include/matx/transforms/eig/eig_cuda.h        |   4 +-
 include/matx/transforms/einsum.h              |   4 +-
 include/matx/transforms/fft/fft_cuda.h        |  16 ++-
 include/matx/transforms/filter.h              |   4 +-
 include/matx/transforms/inverse.h             |   4 +-
 include/matx/transforms/lu/lu_cuda.h          |   4 +-
 include/matx/transforms/matmul/matmul_cuda.h  |   4 +-
 .../matx/transforms/matmul/matmul_cusparse.h  |   4 +-
 .../matx/transforms/matmul/matvec_cusparse.h  |   4 +-
 include/matx/transforms/qr/qr_cuda.h          |   8 +-
 include/matx/transforms/solve/solve_cudss.h   |   4 +-
 include/matx/transforms/svd/svd_cuda.h        |   8 +-
 143 files changed, 606 insertions(+), 110 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9948d1737..59e0ceb1d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -81,6 +81,7 @@ option(MATX_EN_CUDA_LINEINFO "Enable line information for CUDA kernels via -line
 option(MATX_EN_EXTENDED_LAMBDA "Enable extended lambda support for device/host lambdas" ON)
 option(MATX_EN_MATHDX "Enable MathDx support for kernel fusion" OFF)
 option(MATX_EN_UNSAFE_ALIAS_DETECTION "Enable aliased memory detection" OFF)
+option(MATX_DISABLE_EXCEPTIONS "Disable C++ exceptions and log errors instead" OFF)
 
 set(MATX_EN_PYBIND11 OFF CACHE BOOL "Enable pybind11 support")
 
@@ -206,6 +207,9 @@ if (MATX_NVTX_FLAGS)
     add_definitions(-DMATX_NVTX_FLAGS)
     target_compile_definitions(matx INTERFACE MATX_NVTX_FLAGS)
 endif()
+if (MATX_DISABLE_EXCEPTIONS)
+    target_compile_definitions(matx INTERFACE MATX_DISABLE_EXCEPTIONS)
+endif()
 if (MATX_BUILD_32_BIT)
     set(MATX_NVPL_INT_TYPE "lp64")
     target_compile_definitions(matx INTERFACE MATX_INDEX_32_BIT)
diff --git a/docs_input/build.rst b/docs_input/build.rst
index 519cec8ff..607902495 100644
--- a/docs_input/build.rst
+++ b/docs_input/build.rst
@@ -201,6 +201,8 @@ By default, all of these options are OFF.
     - ``-DMATX_EN_MATHDX=ON``
   * - Enable pybind11 Support. This option is usually not explicitly set, but is enabled by other options.
     - ``-DMATX_EN_PYBIND11=ON``
+  * - Disable Exceptions
+    - ``-DMATX_DISABLE_EXCEPTIONS=ON``
 
 
 NVTX Flags
diff --git a/examples/black_scholes.cu b/examples/black_scholes.cu
index 77e872cdc..38996b82d 100644
--- a/examples/black_scholes.cu
+++ b/examples/black_scholes.cu
@@ -150,16 +150,15 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
   index_t input_size = 100'000'000;
   constexpr uint32_t num_iterations = 100;
   float time_ms;
-
-  tensor_t<dtype, 1> K_tensor{{input_size}};
-  tensor_t<dtype, 1> S_tensor{{input_size}};
-  tensor_t<dtype, 1> V_tensor{{input_size}};
-  tensor_t<dtype, 1> r_tensor{{input_size}};
-  tensor_t<dtype, 1> T_tensor{{input_size}};
-  tensor_t<dtype, 1> output_tensor{{input_size}};
-  tensor_t<dtype, 1> output_tensor2{{input_size}};
-  tensor_t<dtype, 1> output_tensor3{{input_size}};
-  tensor_t<dtype, 1> output_tensor4{{input_size}};
+  auto K_tensor = make_tensor<dtype>({input_size});
+  auto S_tensor = make_tensor<dtype>({input_size});
+  auto V_tensor = make_tensor<dtype>({input_size});
+  auto r_tensor = make_tensor<dtype>({input_size});
+  auto T_tensor = make_tensor<dtype>({input_size});
+  auto output_tensor = make_tensor<dtype>({input_size});
+  auto output_tensor2 = make_tensor<dtype>({input_size});
+  auto output_tensor3 = make_tensor<dtype>({input_size});
+  auto output_tensor4 = make_tensor<dtype>({input_size});
 
   (K_tensor = random<float>({input_size}, UNIFORM)).run();
   (S_tensor = random<float>({input_size}, UNIFORM)).run();
@@ -171,8 +170,6 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
   cudaStreamCreate(&stream);
   cudaExecutor exec{stream};
 
-  //compute_black_scholes_matx(K_tensor, S_tensor, V_tensor, r_tensor, T_tensor, output_tensor, exec);
-
   cudaEvent_t start, stop;
   cudaEventCreate(&start);
   cudaEventCreate(&stop);
diff --git a/include/matx/core/allocator.h b/include/matx/core/allocator.h
index d374bdf8f..5e43a7954 100644
--- a/include/matx/core/allocator.h
+++ b/include/matx/core/allocator.h
@@ -43,6 +43,7 @@
 
 #include "matx/core/error.h"
 #include "matx/core/nvtx.h"
+#include "matx/core/log.h"
 #include <cuda/std/functional>
 #include <cuda/std/__algorithm/max.h>
 
@@ -122,6 +123,9 @@ struct MemTracker {
 
     size_t bytes = iter->second.size;
 
+    MATX_LOG_DEBUG("Deallocating memory: ptr={}, {} bytes, space={}, remaining={} bytes", 
+                   ptr, bytes, static_cast<int>(iter->second.kind), matxMemoryStats.currentBytesAllocated - bytes);
+
     matxMemoryStats.currentBytesAllocated -= bytes;
 
     switch (iter->second.kind) {
@@ -187,6 +191,8 @@ struct MemTracker {
       }
     }
     
+    MATX_LOG_DEBUG("Allocating memory: {} bytes, space={}, stream={}", bytes, static_cast<int>(space), reinterpret_cast<void*>(stream));
+    
     switch (space) {
     case MATX_MANAGED_MEMORY:
       err = cudaMallocManaged(ptr, bytes);
@@ -214,6 +220,8 @@ struct MemTracker {
       MATX_THROW(matxOutOfMemory, "Failed to allocate memory");
     }
 
+    MATX_LOG_DEBUG("Allocated memory: ptr={}, {} bytes, total_current={} bytes", *ptr, bytes, matxMemoryStats.currentBytesAllocated + bytes);
+
     [[maybe_unused]] std::unique_lock lck(memory_mtx);
     matxMemoryStats.currentBytesAllocated += bytes;
     matxMemoryStats.totalBytesAllocated += bytes;
diff --git a/include/matx/core/cache.h b/include/matx/core/cache.h
index 1ac94b347..0279316db 100644
--- a/include/matx/core/cache.h
+++ b/include/matx/core/cache.h
@@ -192,11 +192,15 @@ class matxCache_t {
     auto &common_params_cache = rmap[key];
     auto cache_el = common_params_cache.find(params);
     if (cache_el == common_params_cache.end()) {
+      MATX_LOG_DEBUG("Cache MISS for transform: id={}, device={}, thread={}", 
+                     id, key.device_id, reinterpret_cast<void*>(std::hash<std::thread::id>{}(key.thread_id)));
       std::any tmp = mfun();
       common_params_cache.insert({params, tmp});
       efun(std::any_cast<decltype(mfun())>(tmp));
     }
     else {
+      MATX_LOG_DEBUG("Cache HIT for transform: id={}, device={}, thread={}", 
+                     id, key.device_id, reinterpret_cast<void*>(std::hash<std::thread::id>{}(key.thread_id)));
       efun(std::any_cast<decltype(mfun())>(cache_el->second));
     }
   }
diff --git a/include/matx/core/error.h b/include/matx/core/error.h
index e35ea35f2..dd924932b 100644
--- a/include/matx/core/error.h
+++ b/include/matx/core/error.h
@@ -42,6 +42,7 @@
 #endif
 
 #include "matx/core/stacktrace.h"
+#include "matx/core/log.h"
 #endif
 
 namespace matx
@@ -150,6 +151,23 @@ namespace matx
   };
   }
 
+#ifdef MATX_DISABLE_EXCEPTIONS
+
+#define MATX_ENTER_HANDLER() {
+#define MATX_EXIT_HANDLER() }
+
+#define MATX_THROW(e, str_arg)                       \
+  do {                                               \
+    MATX_LOG_FATAL("matxException ({}: {}) - {}:{}", matxErrorString(e), str_arg, __FILE__, __LINE__); \
+    std::stringstream matx_stack_trace;              \
+    detail::printStackTrace(matx_stack_trace);       \
+    std::string matx_stack_str = matx_stack_trace.str(); \
+    MATX_LOG_FATAL("Stack Trace:\n{}", matx_stack_str); \
+    std::abort();                                    \
+  } while(0)
+
+#else
+
 #define MATX_ENTER_HANDLER() \
   try                        \
   {
@@ -158,8 +176,8 @@ namespace matx
   }                                                             \
   catch (matx::detail::matxException & e)                       \
   {                                                             \
-    fprintf(stderr, "%s\n", e.what());                          \
-    fprintf(stderr, "Stack Trace:\n%s", e.stack.str().c_str()); \
+    MATX_LOG_FATAL("{}", e.what());                             \
+    MATX_LOG_FATAL("Stack Trace:\n{}", e.stack.str());          \
     exit(1);                                                    \
   }
 
@@ -168,6 +186,8 @@ namespace matx
     throw matx::detail::matxException(e, str, __FILE__, __LINE__); \
   }
 
+#endif
+
 #if !defined(NDEBUG) && !defined(__CUDA_ARCH__)
   #define MATX_ASSERT(a, error) \
   {                           \
@@ -190,7 +210,7 @@ namespace matx
     auto tmp = a;                      \
     if ((tmp != expected))             \
     {                                  \
-      std::cout << #a ": " << str << "(" << tmp << " != " << expected << ")\n";\
+      MATX_LOG_ERROR("{}: {} ({} != {})", #a, str, static_cast<int>(tmp), static_cast<int>(expected)); \
       MATX_THROW(error, "");           \
     }                                  \
   }
@@ -217,7 +237,7 @@ namespace matx
     const auto e_ = (e);                                        \
     if (e_ != cudaSuccess)                                      \
     {                                                           \
-      fprintf(stderr, "%s:%d CUDA Error: %s (%d)\n", __FILE__,__LINE__, cudaGetErrorString(e_), e_); \
+      MATX_LOG_ERROR("{}:{} CUDA Error: {} ({})", __FILE__, __LINE__, cudaGetErrorString(e_), static_cast<int>(e_)); \
       MATX_THROW(matx::matxCudaError, cudaGetErrorString(e_));  \
     }                                                           \
   } while (0)
@@ -239,21 +259,22 @@ namespace matx
       compatible = (size == 0 || size == Size(i));                   \
     }                                                                \
     if (!compatible) { \
-      std::cerr << "Incompatible operator sizes: ("; \
+      std::string msg = "Incompatible operator sizes: ("; \
       for (int32_t i = 0; i < Rank(); i++) { \
-        std::cerr << Size(i); \
+        msg += std::to_string(Size(i)); \
         if (i != Rank() - 1) { \
-          std::cerr << ","; \
+          msg += ","; \
         } \
       } \
-      std::cerr << ") not compatible with ("; \
+      msg += ") not compatible with ("; \
       for (int32_t i = 0; i < Rank(); i++) { \
-        std::cerr << matx::detail::get_expanded_size<Rank()>(op, i); \
+        msg += std::to_string(matx::detail::get_expanded_size<Rank()>(op, i)); \
         if (i != Rank() - 1) { \
-          std::cerr << ","; \
+          msg += ","; \
         } \
       } \
-      std::cerr << ")" << std::endl; \
+      msg += ")"; \
+      MATX_LOG_ERROR("{}", msg); \
       MATX_THROW(matxInvalidSize, "Incompatible operator sizes"); \
     } \
   }
diff --git a/include/matx/core/log.h b/include/matx/core/log.h
index 094cffc3a..5bd938416 100644
--- a/include/matx/core/log.h
+++ b/include/matx/core/log.h
@@ -46,6 +46,83 @@
 #include <memory>
 #include <mutex>
 
+// Include MatX type traits and complex types for formatting support
+#include "matx/core/half.h"
+#include "matx/core/half_complex.h"
+#include <complex>
+#include <cuda/std/complex>
+
+// Helper for formatting complex types
+namespace matx {
+namespace detail {
+  // Generic helper to format any complex-like type with real() and imag() methods
+  template<typename ComplexType>
+  inline std::string format_complex(const ComplexType& c) {
+    return std::format("({:g}{:+g}j)", 
+                      static_cast<double>(c.real()), 
+                      static_cast<double>(c.imag()));
+  }
+}
+}
+
+// Formatter specializations for all types supported by MatX
+namespace std {
+  // Formatter for std::complex<T>
+  template<typename T>
+  struct formatter<std::complex<T>> {
+    constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+    
+    template<typename FormatContext>
+    auto format(const std::complex<T>& c, FormatContext& ctx) const {
+      return format_to(ctx.out(), "{}", matx::detail::format_complex(c));
+    }
+  };
+  
+  // Formatter for cuda::std::complex<T>
+  template<typename T>
+  struct formatter<cuda::std::complex<T>> {
+    constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+    
+    template<typename FormatContext>
+    auto format(const cuda::std::complex<T>& c, FormatContext& ctx) const {
+      return format_to(ctx.out(), "{}", matx::detail::format_complex(c));
+    }
+  };
+  
+  // Formatter for matxHalfComplex (fp16/bf16 complex)
+  template<typename T>
+  struct formatter<matx::matxHalfComplex<T>> {
+    constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+    
+    template<typename FormatContext>
+    auto format(const matx::matxHalfComplex<T>& c, FormatContext& ctx) const {
+      return format_to(ctx.out(), "{}", matx::detail::format_complex(c));
+    }
+  };
+  
+  // Formatter for matxFp16 (half-precision float)
+  template<>
+  struct formatter<matx::matxFp16> {
+    constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+    
+    template<typename FormatContext>
+    auto format(const matx::matxFp16& val, FormatContext& ctx) const {
+      return format_to(ctx.out(), "{:g}", static_cast<float>(val));
+    }
+  };
+  
+  // Formatter for matxBf16 (bfloat16)
+  template<>
+  struct formatter<matx::matxBf16> {
+    constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+    
+    template<typename FormatContext>
+    auto format(const matx::matxBf16& val, FormatContext& ctx) const {
+      return format_to(ctx.out(), "{:g}", static_cast<float>(val));
+    }
+  };
+}
+
 namespace matx {
 namespace detail {
 
@@ -139,7 +216,7 @@ class Logger {
   std::mutex mutex_;
   bool show_function_;
   
-  Logger() : min_level_(LogLevel::OFF), output_stream_(&std::cout), show_function_(false) {
+  Logger() : min_level_(LogLevel::ERROR), output_stream_(&std::cout), show_function_(false) {
     // Read log level from environment
     const char* level_env = std::getenv("MATX_LOG_LEVEL");
     if (level_env) {
diff --git a/include/matx/core/make_tensor.h b/include/matx/core/make_tensor.h
index f226a2dfc..815eab965 100644
--- a/include/matx/core/make_tensor.h
+++ b/include/matx/core/make_tensor.h
@@ -37,6 +37,7 @@
 #include "matx/core/storage.h"
 #include "matx/core/tensor_desc.h"
 #include "matx/core/dlpack.h"
+#include "matx/core/log.h"
 namespace matx {
 
 /**
@@ -52,6 +53,15 @@ auto make_tensor( const index_t (&shape)[RANK],
                   matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                   cudaStream_t stream = 0) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  std::string shape_str = "[";
+  for (int i = 0; i < RANK; i++) {
+    if (i > 0) shape_str += ",";
+    shape_str += std::to_string(shape[i]);
+  }
+  shape_str += "]";
+  MATX_LOG_DEBUG("make_tensor<T,RANK>(shape, space, stream): shape={}, space={}, stream={}", 
+                 shape_str, static_cast<int>(space), reinterpret_cast<void*>(stream));
 
   DefaultDescriptor<RANK> desc{shape};
   auto storage = make_owning_storage<T>(desc.TotalSize(), space, stream);
@@ -69,6 +79,8 @@ template <typename T, typename ShapeType,
   std::enable_if_t<!is_matx_descriptor_v<ShapeType> && !std::is_array_v<typename remove_cvref<ShapeType>::type>, bool> = true>
 auto make_tensor(Storage<T> storage, ShapeType &&shape) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor<T,ShapeType>(storage, shape): ptr={}", storage.data());
 
   constexpr int RANK = static_cast<int>(cuda::std::tuple_size<typename remove_cvref<ShapeType>::type>::value);
   DefaultDescriptor<RANK> desc{std::forward<ShapeType>(shape)};
@@ -89,6 +101,15 @@ void make_tensor( TensorType &tensor,
                   matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                   cudaStream_t stream = 0) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  std::string shape_str = "[";
+  for (int i = 0; i < TensorType::Rank(); i++) {
+    if (i > 0) shape_str += ",";
+    shape_str += std::to_string(shape[i]);
+  }
+  shape_str += "]";
+  MATX_LOG_DEBUG("make_tensor(tensor&, shape, space, stream): shape={}, space={}, stream={}", 
+                 shape_str, static_cast<int>(space), reinterpret_cast<void*>(stream));
 
   auto tmp = make_tensor<typename TensorType::value_type, TensorType::Rank()>(shape, space, stream);
   tensor.Shallow(tmp);
@@ -108,6 +129,15 @@ auto make_tensor_p( const index_t (&shape)[RANK],
                     matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                     cudaStream_t stream = 0) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  std::string shape_str = "[";
+  for (int i = 0; i < RANK; i++) {
+    if (i > 0) shape_str += ",";
+    shape_str += std::to_string(shape[i]);
+  }
+  shape_str += "]";
+  MATX_LOG_DEBUG("make_tensor_p<T,RANK>(shape, space, stream): shape={}, space={}, stream={}", 
+                 shape_str, static_cast<int>(space), reinterpret_cast<void*>(stream));
 
   DefaultDescriptor<RANK> desc{shape};
   auto storage = make_owning_storage<T>(desc.TotalSize(), space, stream);
@@ -134,6 +164,9 @@ auto make_tensor( ShapeType &&shape,
                   matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                   cudaStream_t stream = 0) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor<T,ShapeType>(shape, space, stream): space={}, stream={}", 
+                 static_cast<int>(space), reinterpret_cast<void*>(stream));
 
   constexpr int rank = static_cast<int>(cuda::std::tuple_size<typename remove_cvref<ShapeType>::type>::value);
   DefaultDescriptor<rank> desc{std::move(shape)};
@@ -165,6 +198,9 @@ auto make_tensor( TensorType &tensor,
                   matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                   cudaStream_t stream = 0) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor(tensor&, shape, space, stream): space={}, stream={}", 
+                 static_cast<int>(space), reinterpret_cast<void*>(stream));
 
   auto tmp = make_tensor<typename TensorType::value_type, ShapeType>(std::forward<ShapeType>(shape), space, stream);
   tensor.Shallow(tmp);
@@ -189,6 +225,9 @@ auto make_tensor_p( ShapeType &&shape,
                     matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                     cudaStream_t stream = 0) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor_p<T,ShapeType>(shape, space, stream): space={}, stream={}", 
+                 static_cast<int>(space), reinterpret_cast<void*>(stream));
 
   DefaultDescriptor<static_cast<int>(cuda::std::tuple_size<typename remove_cvref<ShapeType>::type>::value)> desc{std::move(shape)};
 
@@ -212,6 +251,8 @@ template <typename T>
 auto make_tensor( [[maybe_unused]] const std::initializer_list<detail::no_size_t> t,
                   matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                   cudaStream_t stream = 0) {
+  MATX_LOG_DEBUG("make_tensor<T>(0D, space, stream): space={}, stream={}", 
+                 static_cast<int>(space), reinterpret_cast<void*>(stream));
   using shape_t = cuda::std::array<index_t, 0>;
   return make_tensor<T, shape_t>(shape_t{}, space, stream);
 }
@@ -230,6 +271,8 @@ template <typename TensorType,
 auto make_tensor( TensorType &tensor,
                   matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                   cudaStream_t stream = 0) {
+  MATX_LOG_DEBUG("make_tensor(tensor&, 0D, space, stream): space={}, stream={}", 
+                 static_cast<int>(space), reinterpret_cast<void*>(stream));
   auto tmp = make_tensor<typename TensorType::value_type>({}, space, stream);
   tensor.Shallow(tmp);
 }
@@ -247,6 +290,8 @@ template <typename T>
 auto make_tensor_p( [[maybe_unused]] const std::initializer_list<detail::no_size_t> t,
                     matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                     cudaStream_t stream = 0) {
+  MATX_LOG_DEBUG("make_tensor_p<T>(0D, space, stream): space={}, stream={}", 
+                 static_cast<int>(space), reinterpret_cast<void*>(stream));
 
   cuda::std::array<index_t, 0> shape;
   return make_tensor_p<T, decltype(shape)>(std::move(shape), space, stream);
@@ -268,6 +313,15 @@ auto make_tensor( T *data,
                   const index_t (&shape)[RANK],
                   bool owning = false) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  std::string shape_str = "[";
+  for (int i = 0; i < RANK; i++) {
+    if (i > 0) shape_str += ",";
+    shape_str += std::to_string(shape[i]);
+  }
+  shape_str += "]";
+  MATX_LOG_DEBUG("make_tensor<T,RANK>(data, shape, owning): ptr={}, shape={}, owning={}", 
+                 reinterpret_cast<void*>(data), shape_str, owning);
 
   DefaultDescriptor<RANK> desc{shape};
   auto storage = owning ? make_owning_storage<T>(desc.TotalSize()) : make_non_owning_storage<T>(data, desc.TotalSize());
@@ -291,6 +345,15 @@ auto make_tensor( TensorType &tensor,
                   typename TensorType::value_type *data,
                   const index_t (&shape)[TensorType::Rank()]) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  std::string shape_str = "[";
+  for (int i = 0; i < TensorType::Rank(); i++) {
+    if (i > 0) shape_str += ",";
+    shape_str += std::to_string(shape[i]);
+  }
+  shape_str += "]";
+  MATX_LOG_DEBUG("make_tensor(tensor&, data, shape): ptr={}, shape={}", 
+                 reinterpret_cast<void*>(data), shape_str);
 
   auto tmp = make_tensor<typename TensorType::value_type, TensorType::Rank()>(data, shape, false);
   tensor.Shallow(tmp);
@@ -313,6 +376,9 @@ auto make_tensor( T *data,
                   ShapeType &&shape,
                   bool owning = false) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor<T,ShapeType>(data, shape, owning): ptr={}, owning={}", 
+                 reinterpret_cast<void*>(data), owning);
 
   constexpr int RANK = static_cast<int>(cuda::std::tuple_size<typename remove_cvref<ShapeType>::type>::value);
   DefaultDescriptor<RANK>
@@ -338,6 +404,9 @@ auto make_tensor( TensorType &tensor,
                   typename TensorType::value_type *data,
                   typename TensorType::shape_container &&shape) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor(tensor&, data, shape): ptr={}", reinterpret_cast<void*>(data));
+  
   auto tmp = make_tensor<typename TensorType::value_type, typename TensorType::shape_container>(data, std::forward<typename TensorType::shape_container>(shape), false);
   tensor.Shallow(tmp);
 }
@@ -356,6 +425,8 @@ template <typename T>
 auto make_tensor( T *ptr,
                   [[maybe_unused]] const std::initializer_list<detail::no_size_t> t,
                   bool owning = false) {
+  MATX_LOG_DEBUG("make_tensor<T>(ptr, 0D, owning): ptr={}, owning={}", 
+                 reinterpret_cast<void*>(ptr), owning);
   cuda::std::array<index_t, 0> shape;
   return make_tensor<T, decltype(shape)>(ptr, std::move(shape), owning);
 }
@@ -373,6 +444,7 @@ template <typename TensorType,
   std::enable_if_t<is_tensor_view_v<TensorType>, bool> = true>
 auto make_tensor( TensorType &tensor,
                   typename TensorType::value_type *ptr) {
+  MATX_LOG_DEBUG("make_tensor(tensor&, ptr, 0D): ptr={}", reinterpret_cast<void*>(ptr));
   auto tmp = make_tensor<typename TensorType::value_type>(ptr, false);
   tensor.Shallow(tmp);
 }
@@ -396,6 +468,9 @@ auto make_tensor_p( T *const data,
                     ShapeType &&shape,
                     bool owning = false) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor_p<T,ShapeType>(data, shape, owning): ptr={}, owning={}", 
+                 reinterpret_cast<const void*>(data), owning);
 
   constexpr int RANK = static_cast<int>(cuda::std::tuple_size<typename remove_cvref<ShapeType>::type>::value);
   DefaultDescriptor<RANK>
@@ -417,6 +492,14 @@ template <typename T, int RANK, typename Allocator>
 auto make_tensor( const index_t (&shape)[RANK],
                   Allocator&& alloc) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  std::string shape_str = "[";
+  for (int i = 0; i < RANK; i++) {
+    if (i > 0) shape_str += ",";
+    shape_str += std::to_string(shape[i]);
+  }
+  shape_str += "]";
+  MATX_LOG_DEBUG("make_tensor<T,RANK,Allocator>(shape, alloc): shape={}", shape_str);
 
   DefaultDescriptor<RANK> desc{shape};
   auto storage = make_owning_storage<T>(desc.TotalSize(), std::forward<Allocator>(alloc));
@@ -438,6 +521,8 @@ template <typename T, typename ShapeType, typename Allocator,
 auto make_tensor( ShapeType &&shape,
                   Allocator&& alloc) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor<T,ShapeType,Allocator>(shape, alloc)");
 
   constexpr int RANK = static_cast<int>(cuda::std::tuple_size<typename remove_cvref<ShapeType>::type>::value);
   DefaultDescriptor<RANK> desc{std::forward<ShapeType>(shape)};
@@ -461,6 +546,14 @@ void make_tensor( TensorType &tensor,
                   const index_t (&shape)[TensorType::Rank()],
                   Allocator&& alloc) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  std::string shape_str = "[";
+  for (int i = 0; i < TensorType::Rank(); i++) {
+    if (i > 0) shape_str += ",";
+    shape_str += std::to_string(shape[i]);
+  }
+  shape_str += "]";
+  MATX_LOG_DEBUG("make_tensor(tensor&, shape, alloc): shape={}", shape_str);
 
   auto tmp = make_tensor<typename TensorType::value_type, TensorType::Rank()>(shape, std::forward<Allocator>(alloc));
   tensor.Shallow(tmp);
@@ -483,6 +576,8 @@ void make_tensor( TensorType &tensor,
                   ShapeType &&shape,
                   Allocator&& alloc) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor(tensor&, shape, alloc)");
 
   auto tmp = make_tensor<typename TensorType::value_type>(std::forward<ShapeType>(shape), std::forward<Allocator>(alloc));
   tensor.Shallow(tmp);
@@ -505,6 +600,9 @@ auto make_tensor( T* const data,
                   D &&desc,
                   bool owning = false) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor<T,D>(data, desc, owning): ptr={}, owning={}", 
+                 reinterpret_cast<const void*>(data), owning);
 
   using Dstrip = typename remove_cvref<D>::type;
   auto storage = owning ? make_owning_storage<T>(desc.TotalSize()) : make_non_owning_storage<T>(data, desc.TotalSize());
@@ -528,6 +626,8 @@ auto make_tensor( TensorType &tensor,
                   typename TensorType::value_type* const data,
                   typename TensorType::desc_type &&desc) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor(tensor&, data, desc): ptr={}", reinterpret_cast<const void*>(data));
 
   // This tensor should be non-owning regardless of the original ownership since it will go out of scope at the end of the function
   auto tmp = make_tensor<typename TensorType::value_type, typename TensorType::desc_type>(data, std::forward<typename TensorType::desc_type>(desc), false);
@@ -547,6 +647,9 @@ auto make_tensor( D &&desc,
                   matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                   cudaStream_t stream = 0) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor<T,D>(desc, space, stream): space={}, stream={}", 
+                 static_cast<int>(space), reinterpret_cast<void*>(stream));
 
   using Dstrip = typename remove_cvref<D>::type;
 
@@ -570,6 +673,9 @@ auto make_tensor( TensorType &&tensor,
                   matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                   cudaStream_t stream = 0) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor(tensor&&, desc, space, stream): space={}, stream={}", 
+                 static_cast<int>(space), reinterpret_cast<void*>(stream));
 
   auto tmp = make_tensor<typename TensorType::value_type, typename TensorType::desc_type>(std::forward<typename TensorType::desc_type>(desc), space, stream);
   tensor.Shallow(tmp);
@@ -594,6 +700,18 @@ auto make_tensor( T *const data,
                   const index_t (&strides)[RANK],
                   bool owning = false) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  std::string shape_str = "[";
+  std::string strides_str = "[";
+  for (int i = 0; i < RANK; i++) {
+    if (i > 0) { shape_str += ","; strides_str += ","; }
+    shape_str += std::to_string(shape[i]);
+    strides_str += std::to_string(strides[i]);
+  }
+  shape_str += "]";
+  strides_str += "]";
+  MATX_LOG_DEBUG("make_tensor<T,RANK>(data, shape, strides, owning): ptr={}, shape={}, strides={}, owning={}", 
+                 reinterpret_cast<const void*>(data), shape_str, strides_str, owning);
 
   DefaultDescriptor<RANK>  desc{shape, strides};
   auto storage = owning ? make_owning_storage<T>(desc.TotalSize()) : make_non_owning_storage<T>(data, desc.TotalSize());
@@ -620,6 +738,18 @@ auto make_tensor( TensorType &tensor,
                   const index_t (&shape)[TensorType::Rank()],
                   const index_t (&strides)[TensorType::Rank()]) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  std::string shape_str = "[";
+  std::string strides_str = "[";
+  for (int i = 0; i < TensorType::Rank(); i++) {
+    if (i > 0) { shape_str += ","; strides_str += ","; }
+    shape_str += std::to_string(shape[i]);
+    strides_str += std::to_string(strides[i]);
+  }
+  shape_str += "]";
+  strides_str += "]";
+  MATX_LOG_DEBUG("make_tensor(tensor&, data, shape, strides): ptr={}, shape={}, strides={}", 
+                 reinterpret_cast<const void*>(data), shape_str, strides_str);
 
   auto tmp = make_tensor<typename TensorType::value_type, TensorType::Rank()>(data, shape, strides, false);
   tensor.Shallow(tmp);
@@ -633,6 +763,8 @@ auto make_tensor( TensorType &tensor,
 template <typename T, index_t I, index_t ...Is>
 auto make_static_tensor() {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_static_tensor<T,I,Is...>()");
 
   static_tensor_desc_t<I, Is...> desc{};
   auto storage = make_owning_storage<T>(desc.TotalSize());
@@ -644,6 +776,8 @@ template <typename TensorType,
 auto make_tensor( TensorType &tensor,
                   const DLManagedTensor dlp_tensor) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor(tensor&, DLManagedTensor): ptr={}", dlp_tensor.dl_tensor.data);
 
   using T = typename TensorType::value_type;
   const DLTensor &dt = dlp_tensor.dl_tensor;
diff --git a/include/matx/executors/cuda.h b/include/matx/executors/cuda.h
index 82757d82f..5cb15b6b8 100644
--- a/include/matx/executors/cuda.h
+++ b/include/matx/executors/cuda.h
@@ -38,6 +38,7 @@
 #include "matx/core/get_grid_dims.h"
 #include "matx/executors/kernel.h"
 #include "matx/executors/cuda_executor_common.h"
+#include "matx/core/log.h"
 #include <cuda/std/array>
 #include <utility>
 #include <vector>
@@ -269,6 +270,9 @@ namespace matx
             // Helper lambda to launch kernel
             auto launch_kernel = [&]<detail::ElementsPerThread EPT>() {
               dispatch_kernel.template operator()<EPT>([&](auto launch_func) {
+                MATX_LOG_DEBUG("Launching CUDA kernel: rank={}, blocks=({},{},{}), threads=({},{},{}), EPT={}, stream={}", 
+                               Op::Rank(), blocks.x, blocks.y, blocks.z, threads.x, threads.y, threads.z, 
+                               static_cast<int>(EPT), reinterpret_cast<void*>(stream_));
                 launch_func();
               });
             };
diff --git a/include/matx/generators/alternate.h b/include/matx/generators/alternate.h
index bf41ba780..9d185cb39 100644
--- a/include/matx/generators/alternate.h
+++ b/include/matx/generators/alternate.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/generator1d.h"
+#include "matx/core/log.h"
 
 namespace matx
 {
@@ -47,7 +48,11 @@ namespace matx
         using matxop = bool;        
 
 	      __MATX_INLINE__ std::string str() const { return "alternate"; }
-        __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ Alternating(index_t size) : size_(size) {};
+        __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ Alternating(index_t size) : size_(size) {
+#ifndef __CUDA_ARCH__
+          MATX_LOG_TRACE("Alternating constructor: size={}", size);
+#endif
+        };
 
         template <OperatorCapability Cap, typename InType>
         __MATX_INLINE__ __MATX_HOST__ auto get_capability([[maybe_unused]] InType &in) const {
diff --git a/include/matx/generators/bartlett.h b/include/matx/generators/bartlett.h
index 3351c9d96..d72d195e2 100644
--- a/include/matx/generators/bartlett.h
+++ b/include/matx/generators/bartlett.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/generator1d.h"
+#include "matx/core/log.h"
 #include <type_traits>
 
 namespace matx
@@ -48,7 +49,11 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "bartlett"; }
 
-        inline __MATX_HOST__ __MATX_DEVICE__ Bartlett(index_t size) : size_(size){};
+        inline __MATX_HOST__ __MATX_DEVICE__ Bartlett(index_t size) : size_(size){
+#ifndef __CUDA_ARCH__
+          MATX_LOG_TRACE("Bartlett constructor: size={}", size);
+#endif
+        };
 
         template <typename CapType>
         inline __MATX_HOST__ __MATX_DEVICE__ auto operator()(index_t i) const
diff --git a/include/matx/generators/blackman.h b/include/matx/generators/blackman.h
index dc4aad33f..27b9b1fa8 100644
--- a/include/matx/generators/blackman.h
+++ b/include/matx/generators/blackman.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/generator1d.h"
+#include "matx/core/log.h"
 
 namespace matx
 {
@@ -47,7 +48,11 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "blackman"; }
 
-        __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ Blackman(index_t size) : size_(size){};
+        __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ Blackman(index_t size) : size_(size){
+#ifndef __CUDA_ARCH__
+          MATX_LOG_TRACE("Blackman constructor: size={}", size);
+#endif
+        };
 
         template <typename CapType>
         __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ auto operator()(index_t i) const
diff --git a/include/matx/generators/chirp.h b/include/matx/generators/chirp.h
index 768fa4e79..06f021e85 100644
--- a/include/matx/generators/chirp.h
+++ b/include/matx/generators/chirp.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/linspace.h"
+#include "matx/core/log.h"
 
 namespace matx
 {
@@ -70,7 +71,11 @@ namespace matx
           f1_(f1),          
           t1_(t1),
           method_(method)
-        {}
+        {
+#ifndef __CUDA_ARCH__
+          MATX_LOG_TRACE("Chirp constructor: f0={}, f1={}, t1={}", f0, f1, t1);
+#endif
+        }
 
         template <OperatorCapability Cap, typename InType>
         __MATX_INLINE__ __MATX_HOST__ auto get_capability([[maybe_unused]] InType &in) const {
@@ -131,7 +136,11 @@ namespace matx
           f1_(f1),
           t1_(t1),          
           method_(method)
-        {}
+        {
+#ifndef __CUDA_ARCH__
+          MATX_LOG_TRACE("ComplexChirp constructor: f0={}, f1={}, t1={}", f0, f1, t1);
+#endif
+        }
 
         template <OperatorCapability Cap, typename InType>
         __MATX_INLINE__ __MATX_HOST__ auto get_capability([[maybe_unused]] InType &in) const {
diff --git a/include/matx/generators/diag.h b/include/matx/generators/diag.h
index 83600a532..b3dece465 100644
--- a/include/matx/generators/diag.h
+++ b/include/matx/generators/diag.h
@@ -32,6 +32,7 @@
 
 #pragma once
 
+#include "matx/core/log.h"
 
 namespace matx
 {
@@ -56,6 +57,7 @@ namespace matx
         if constexpr (!is_noshape_v<ShapeType>) {
           static_assert(Rank() > 1, "Diagonal generator must be used with an operator of rank 1 or higher");
         }
+        MATX_LOG_TRACE("Diag constructor: rank={}, val={}", Rank(), val);
       };
 
       template <OperatorCapability Cap, typename InType>
diff --git a/include/matx/generators/fftfreq.h b/include/matx/generators/fftfreq.h
index d9d595199..d60e9dd4e 100644
--- a/include/matx/generators/fftfreq.h
+++ b/include/matx/generators/fftfreq.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/generator1d.h"
+#include "matx/core/log.h"
 
 namespace matx
 {
@@ -52,6 +53,7 @@ namespace matx
         {
           n_ = n;
           d_ = d;
+          MATX_LOG_TRACE("FFTFreqOp constructor: n={}, d={}", n, d);
         }
 
         template <typename CapType>
diff --git a/include/matx/generators/flattop.h b/include/matx/generators/flattop.h
index 62943b7f5..295fb9a3c 100644
--- a/include/matx/generators/flattop.h
+++ b/include/matx/generators/flattop.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/generator1d.h"
+#include "matx/core/log.h"
 #include <type_traits>
 
 namespace matx
@@ -54,7 +55,11 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "flattop"; }
 
-        inline __MATX_HOST__ __MATX_DEVICE__ FlatTop(index_t size) : size_(size){};
+        inline __MATX_HOST__ __MATX_DEVICE__ FlatTop(index_t size) : size_(size){
+#ifndef __CUDA_ARCH__
+          MATX_LOG_TRACE("FlatTop constructor: size={}", size);
+#endif
+        };
 
         template <typename CapType>
         inline __MATX_HOST__ __MATX_DEVICE__ auto operator()(index_t i) const
diff --git a/include/matx/generators/hamming.h b/include/matx/generators/hamming.h
index 6a032a142..da7ef2975 100644
--- a/include/matx/generators/hamming.h
+++ b/include/matx/generators/hamming.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/generator1d.h"
+#include "matx/core/log.h"
 #include <type_traits>
 
 namespace matx
@@ -48,7 +49,11 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "hamming"; }
 	
-        inline __MATX_HOST__ __MATX_DEVICE__ Hamming(index_t size) : size_(size){};
+        inline __MATX_HOST__ __MATX_DEVICE__ Hamming(index_t size) : size_(size){
+#ifndef __CUDA_ARCH__
+          MATX_LOG_TRACE("Hamming constructor: size={}", size);
+#endif
+        };
 
         template <typename CapType>
         inline __MATX_HOST__ __MATX_DEVICE__ auto operator()(index_t i) const 
diff --git a/include/matx/generators/hanning.h b/include/matx/generators/hanning.h
index d40a85d07..2a291d0de 100644
--- a/include/matx/generators/hanning.h
+++ b/include/matx/generators/hanning.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/generator1d.h"
+#include "matx/core/log.h"
 #include <type_traits>
 
 namespace matx
@@ -48,7 +49,11 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "hanning"; }
 
-        inline __MATX_HOST__ __MATX_DEVICE__ Hanning(index_t size) : size_(size){};
+        inline __MATX_HOST__ __MATX_DEVICE__ Hanning(index_t size) : size_(size){
+#ifndef __CUDA_ARCH__
+          MATX_LOG_TRACE("Hanning constructor: size={}", size);
+#endif
+        };
 
         template <typename CapType>
         inline __MATX_HOST__ __MATX_DEVICE__ auto operator()(index_t i) const
diff --git a/include/matx/generators/linspace.h b/include/matx/generators/linspace.h
index bd9dbadb8..966e9a719 100644
--- a/include/matx/generators/linspace.h
+++ b/include/matx/generators/linspace.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/range.h"
+#include "matx/core/log.h"
 
 namespace matx
 {
@@ -60,6 +61,7 @@ namespace matx
 
         inline LinspaceOp(const T (&firsts)[NUM_RC], const T (&lasts)[NUM_RC], index_t count, int axis) 
         {
+          MATX_LOG_TRACE("LinspaceOp constructor: NUM_RC={}, count={}, axis={}", NUM_RC, count, axis);
           axis_ = axis;
           count_ = count;
           for (int i = 0; i < NUM_RC; ++i) {
diff --git a/include/matx/generators/logspace.h b/include/matx/generators/logspace.h
index 45cfc2b52..12a0862ba 100644
--- a/include/matx/generators/logspace.h
+++ b/include/matx/generators/logspace.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/generator1d.h"
+#include "matx/core/log.h"
 #include <type_traits>
 
 namespace matx
@@ -67,6 +68,7 @@ namespace matx
           else {
             range_ = Range<T>{first, (last - first) / static_cast<T>(count - 1)};
           }
+          MATX_LOG_TRACE("Logspace constructor: first={}, last={}, count={}", first, last, count);
 #endif
         }
 
diff --git a/include/matx/generators/meshgrid.h b/include/matx/generators/meshgrid.h
index 67e1ba1e4..9453e913a 100644
--- a/include/matx/generators/meshgrid.h
+++ b/include/matx/generators/meshgrid.h
@@ -32,6 +32,7 @@
 
 #pragma once
 #include "matx/operators/permute.h"
+#include "matx/core/log.h"
 
 namespace matx
 {
@@ -55,6 +56,7 @@ namespace matx
           __MATX_INLINE__ MeshGridOp(T1 t1, cuda::std::array<index_t, RANK> shape, int idx) : t1_(t1), shape_(shape), idx_(idx) {
             static_assert(shape.size() == RANK );
             static_assert(is_matx_op<T1>());
+            MATX_LOG_TRACE("MeshGridOp constructor: rank={}, axis={}, idx={}", RANK, AXIS, idx);
           }
 
 
diff --git a/include/matx/generators/random.h b/include/matx/generators/random.h
index d8389b2e8..03e1be8d8 100644
--- a/include/matx/generators/random.h
+++ b/include/matx/generators/random.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/core/error.h"
+#include "matx/core/log.h"
 #include <cuda/std/complex>
 #include <curand_kernel.h>
 #include <type_traits>
@@ -279,6 +280,8 @@ namespace detail {
         for (int i = RANK - 2; i >= 0; i--) {
           strides_[i] = strides_[i+1] * s[i+1];
         }
+        
+        MATX_LOG_TRACE("RandomOp constructor: rank={}, total_size={}, seed={}", RANK, total_size_, seed);
       }
 
       template <OperatorCapability Cap, typename InType>
diff --git a/include/matx/generators/range.h b/include/matx/generators/range.h
index 01d817eb0..bcd21a619 100644
--- a/include/matx/generators/range.h
+++ b/include/matx/generators/range.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/generator1d.h"
+#include "matx/core/log.h"
 #include <type_traits>
 
 namespace matx
@@ -51,7 +52,9 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "range"; }
 
-        Range(T first, T step) : first_(first), step_(step) {}
+        Range(T first, T step) : first_(first), step_(step) {
+          MATX_LOG_TRACE("Range constructor: first={}, step={}", first, step);
+        }
 
         template <typename CapType>
         __MATX_DEVICE__ __MATX_HOST__ __MATX_INLINE__ auto operator()(index_t idx) const
diff --git a/include/matx/operators/all.h b/include/matx/operators/all.h
index 9490d7720..04cbdb652 100644
--- a/include/matx/operators/all.h
+++ b/include/matx/operators/all.h
@@ -60,6 +60,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "all(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ AllOp(const OpA &a) : a_(a) { 
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         for (int r = 0; r < ORank; r++) {
           out_dims_[r] = a_.Size(r);
         }
diff --git a/include/matx/operators/ambgfun.h b/include/matx/operators/ambgfun.h
index 0ca5559d6..43add18fc 100644
--- a/include/matx/operators/ambgfun.h
+++ b/include/matx/operators/ambgfun.h
@@ -74,7 +74,7 @@ namespace matx
 
         __MATX_INLINE__ AmbgFunOp(const OpX &x, const OpY &y, double fs, AMBGFunCutType_t cut, float cut_val) : 
               x_(x), y_(y), fs_(fs), cut_(cut), cut_val_(cut_val) {
-          
+          MATX_LOG_TRACE("{} constructor: fs={}, cut={}", str(), fs, static_cast<int>(cut));
           static_assert(OpX::Rank() == 1, "Input to ambgfun must be rank 1");                
           if (cut == AMBGFUN_CUT_TYPE_2D) {
             out_dims_[0] = 2 * x_.Size(0) - 1;
diff --git a/include/matx/operators/any.h b/include/matx/operators/any.h
index 9cac0f81a..3183b08a9 100644
--- a/include/matx/operators/any.h
+++ b/include/matx/operators/any.h
@@ -60,6 +60,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "any(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ AnyOp(const OpA &a) : a_(a) { 
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         for (int r = 0; r < ORank; r++) {
           out_dims_[r] = a_.Size(r);
         }        
diff --git a/include/matx/operators/apply.h b/include/matx/operators/apply.h
index 2579f8f03..05e8555cd 100644
--- a/include/matx/operators/apply.h
+++ b/include/matx/operators/apply.h
@@ -59,6 +59,7 @@ namespace matx
 
         __MATX_INLINE__ ApplyOp(Func func, const Ops&... ops) : func_(func), ops_(detail::base_type_t<Ops>(ops)...)
         {
+          MATX_LOG_TRACE("{} constructor: num_ops={}", str(), sizeof...(Ops));
           static_assert(sizeof...(Ops) > 0, "ApplyOp requires at least one input operator");
           
           // Initialize sizes from the first operator
diff --git a/include/matx/operators/apply_idx.h b/include/matx/operators/apply_idx.h
index 13adc09c9..9a8d09c3d 100644
--- a/include/matx/operators/apply_idx.h
+++ b/include/matx/operators/apply_idx.h
@@ -63,6 +63,7 @@ namespace matx
 
         __MATX_INLINE__ ApplyIdxOp(Func func, const Ops&... ops) : func_(func), ops_(detail::base_type_t<Ops>(ops)...)
         {
+          MATX_LOG_TRACE("{} constructor: num_ops={}", str(), sizeof...(Ops));
           static_assert(sizeof...(Ops) > 0, "ApplyIdxOp requires at least one input operator");
           
           // Initialize sizes from the first operator
diff --git a/include/matx/operators/argmax.h b/include/matx/operators/argmax.h
index 82f0d2019..57ff01c6a 100644
--- a/include/matx/operators/argmax.h
+++ b/include/matx/operators/argmax.h
@@ -57,7 +57,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "argmax(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ ArgMaxOp(const OpA &a) : a_(a) { 
-     
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
       };
 
       template <typename... Is>
diff --git a/include/matx/operators/argmin.h b/include/matx/operators/argmin.h
index b8a203704..5bdce769e 100644
--- a/include/matx/operators/argmin.h
+++ b/include/matx/operators/argmin.h
@@ -56,7 +56,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "argmin(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ ArgMinOp(const OpA &a) : a_(a) {
-     
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
       };
 
       template <typename... Is>
diff --git a/include/matx/operators/argminmax.h b/include/matx/operators/argminmax.h
index 4f395cb99..8a978d067 100644
--- a/include/matx/operators/argminmax.h
+++ b/include/matx/operators/argminmax.h
@@ -57,7 +57,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "argminmax(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ ArgMinMaxOp(const OpA &a) : a_(a) {
-     
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
       };
 
       template <typename... Is>
diff --git a/include/matx/operators/argsort.h b/include/matx/operators/argsort.h
index a733a2daf..98d3020bc 100644
--- a/include/matx/operators/argsort.h
+++ b/include/matx/operators/argsort.h
@@ -61,6 +61,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "argsort()"; }
       __MATX_INLINE__ ArgsortOp(const OpA &a, const SortDirection_t dir) : a_(a), dir_(dir) { 
+        MATX_LOG_TRACE("{} constructor: rank={}, dir={}", str(), Rank(), static_cast<int>(dir));
         for (int r = 0; r < Rank(); r++) {
           out_dims_[r] = a_.Size(r);
         }
diff --git a/include/matx/operators/at.h b/include/matx/operators/at.h
index 4ee004799..2522414ae 100644
--- a/include/matx/operators/at.h
+++ b/include/matx/operators/at.h
@@ -53,7 +53,9 @@ namespace matx
         using value_type = typename Op::value_type;
 
         __MATX_INLINE__ std::string str() const { return "at()"; }
-        __MATX_INLINE__ AtOp(const Op &op, Is... is) : op_(op), idx_{is...} {};
+        __MATX_INLINE__ AtOp(const Op &op, Is... is) : op_(op), idx_{is...} {
+          MATX_LOG_TRACE("{} constructor: num_indices={}", str(), sizeof...(Is));
+        };
 
         template <typename CapType, typename... Is2>
         __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()([[maybe_unused]] Is2... indices) const
diff --git a/include/matx/operators/base_operator.h b/include/matx/operators/base_operator.h
index 393c3e76b..5a4e30e6a 100644
--- a/include/matx/operators/base_operator.h
+++ b/include/matx/operators/base_operator.h
@@ -38,6 +38,7 @@
 #include "matx/core/operator_utils.h"
 #include "matx/core/capabilities.h"
 #include "matx/core/error.h"
+#include "matx/core/log.h"
 
 namespace matx
 {
diff --git a/include/matx/operators/binary_operators.h b/include/matx/operators/binary_operators.h
index 1aec8d316..21343d932 100644
--- a/include/matx/operators/binary_operators.h
+++ b/include/matx/operators/binary_operators.h
@@ -123,6 +123,7 @@ namespace matx
 
       __MATX_INLINE__ matxBinaryOp(const I1 &in1, const I2 &in2, const Op &op) : in1_(in1), in2_(in2), op_(op)
       {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         if constexpr (Rank() > 0)
         {
           MATX_ASSERT_COMPATIBLE_OP_SIZES(in1_);
diff --git a/include/matx/operators/cart2sph.h b/include/matx/operators/cart2sph.h
index ebbf45c37..470832b90 100644
--- a/include/matx/operators/cart2sph.h
+++ b/include/matx/operators/cart2sph.h
@@ -59,6 +59,7 @@ namespace matx
 
         __MATX_INLINE__ Cart2SphOp(const T1 &x, const T2 &y, const T3 &z) : x_(x), y_(y), z_(z)
       {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         MATX_ASSERT_COMPATIBLE_OP_SIZES(x);
         MATX_ASSERT_COMPATIBLE_OP_SIZES(y);
         MATX_ASSERT_COMPATIBLE_OP_SIZES(z);
diff --git a/include/matx/operators/cast.h b/include/matx/operators/cast.h
index 430f5ae43..aba8047cf 100644
--- a/include/matx/operators/cast.h
+++ b/include/matx/operators/cast.h
@@ -71,7 +71,9 @@ namespace matx
         using value_type = NewType;
 
 	      __MATX_INLINE__ std::string str() const { return as_type_str<NewType>() + "(" + op_.str() + ")"; }
-        __MATX_INLINE__ CastOp(const T &op) : op_(op){};
+        __MATX_INLINE__ CastOp(const T &op) : op_(op){
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+        };
 
         template <typename CapType, typename... Is>
         __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices) const
@@ -137,6 +139,7 @@ namespace matx
 
 	      __MATX_INLINE__ std::string str() const { return as_type_str<NewType>() + "(" + real_op_.str() + "," + imag_op_.str() + ")"; }
         __MATX_INLINE__ ComplexCastOp(T1 real_op, T2 imag_op) : real_op_(real_op), imag_op_(imag_op) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           static_assert(detail::get_rank<T1>() == detail::get_rank<T2>(), "rank of real and imaginary operators must match");
           if (real_op_.Shape() != imag_op_.Shape()) {
             MATX_THROW(matxInvalidSize, "ComplexCastOp: sizes of input operators must match in all dimensions");
diff --git a/include/matx/operators/cgsolve.h b/include/matx/operators/cgsolve.h
index 91b109bc4..a42c61531 100644
--- a/include/matx/operators/cgsolve.h
+++ b/include/matx/operators/cgsolve.h
@@ -65,7 +65,7 @@ namespace matx
 
         __MATX_INLINE__ CGSolveOp(const OpA &A, const OpB &B, double tol, int max_iters) : 
               a_(A), b_(B), tol_(tol), max_iters_(max_iters) {
-          
+          MATX_LOG_TRACE("{} constructor: tol={}, max_iters={}", str(), tol, max_iters);
           for (int r = 0; r < Rank(); r++) {
             out_dims_[r] = b_.Size(r);
           }
diff --git a/include/matx/operators/channelize_poly.h b/include/matx/operators/channelize_poly.h
index 40e71ebd8..b66288360 100644
--- a/include/matx/operators/channelize_poly.h
+++ b/include/matx/operators/channelize_poly.h
@@ -68,7 +68,8 @@ namespace detail {
       __MATX_INLINE__ std::string str() const { return "channelize_poly(" + get_type_str(a_) + "," + get_type_str(f_) + ")";}
       __MATX_INLINE__ ChannelizePolyOp(const OpA &a, const FilterType &f, index_t num_channels, index_t decimation_factor) :
           a_(a), f_(f), num_channels_(num_channels), decimation_factor_(decimation_factor)
-      { 
+      {
+        MATX_LOG_TRACE("{} constructor: num_channels={}, decimation_factor={}", str(), num_channels, decimation_factor); 
         const index_t b_len = (a_.Size(OpA::Rank() - 1) + num_channels - 1) / num_channels;
 
         for (int r = 0; r < OpA::Rank()-1; r++) {
diff --git a/include/matx/operators/chol.h b/include/matx/operators/chol.h
index 8c57f8c93..cb7655ed6 100644
--- a/include/matx/operators/chol.h
+++ b/include/matx/operators/chol.h
@@ -61,7 +61,9 @@ namespace detail {
       using can_alias = bool; // Chol is allowed to use the same input/output memory
 
       __MATX_INLINE__ std::string str() const { return "chol()"; }
-      __MATX_INLINE__ CholOp(const OpA &a, SolverFillMode uplo) : a_(a), uplo_(uplo) { }
+      __MATX_INLINE__ CholOp(const OpA &a, SolverFillMode uplo) : a_(a), uplo_(uplo) {
+        MATX_LOG_TRACE("{} constructor: uplo={}", str(), static_cast<int>(uplo));
+      }
 
       // This should never be called
       template <typename... Is>
diff --git a/include/matx/operators/clone.h b/include/matx/operators/clone.h
index ed29e950f..339405798 100644
--- a/include/matx/operators/clone.h
+++ b/include/matx/operators/clone.h
@@ -83,7 +83,7 @@ MATX_IGNORE_WARNING_POP_GCC
             }
           }
           MATX_ASSERT(d == T::Rank(), matxInvalidDim);
-
+          MATX_LOG_TRACE("{} constructor: input_rank={}, output_rank={}", str(), T::Rank(), CRank);
         }
 
         template <typename CapType, typename Op, typename Dims, typename... Is>
diff --git a/include/matx/operators/collapse.h b/include/matx/operators/collapse.h
index c3afc0625..8e3d153d4 100644
--- a/include/matx/operators/collapse.h
+++ b/include/matx/operators/collapse.h
@@ -56,6 +56,7 @@ namespace matx
         {
           static_assert(DIM <= T1::Rank(),  "Collapse DIM must be less than or equal to Rank() of operator");
           static_assert(DIM > 1, "Must collapse multiple dims");
+          MATX_LOG_TRACE("{} constructor: input_rank={}, output_rank={}", str(), T1::Rank(), T1::Rank() - DIM + 1);
           static_assert(T1::Rank() >= 2, "Collapse must be called on operators with rank >= 2");
 
           // compute size of collapsed dimension
diff --git a/include/matx/operators/comma.h b/include/matx/operators/comma.h
index 7823cee38..005c5b1ef 100644
--- a/include/matx/operators/comma.h
+++ b/include/matx/operators/comma.h
@@ -49,6 +49,7 @@ namespace matx
       class CommaOp : public BaseOp<CommaOp<Op1, Op2>>{
         public:
           __MATX_HOST__ __MATX_INLINE__  CommaOp(const Op1 &op1, const Op2 &op2) : op1_(op1), op2_(op2) {
+            MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
             MATX_STATIC_ASSERT_STR(Op1::Rank() == Op2::Rank(), matxInvalidSize, 
                 "Chained expressions using the comma operator must match in rank");
             if constexpr ( Rank() > 0) {
diff --git a/include/matx/operators/concat.h b/include/matx/operators/concat.h
index f4a35a2e8..3f7d3c792 100644
--- a/include/matx/operators/concat.h
+++ b/include/matx/operators/concat.h
@@ -79,6 +79,7 @@ namespace matx
         static_assert(RANK > 0, "Cannot concatenate rank-0 tensors");
         static_assert(sizeof...(Ts) > 1, "Must have more than one tensor to concatenate");
         static_assert((... && (RANK == Ts::Rank())), "concatenated ops must have the same rank");
+        MATX_LOG_TRACE("{} constructor: rank={}, axis={}, num_tensors={}", str(), RANK, axis, sizeof...(Ts));
 
         for (int32_t i = 0; i < RANK; i++) {
           if(i == axis_) {
diff --git a/include/matx/operators/conv.h b/include/matx/operators/conv.h
index f40bc64eb..de96de60c 100644
--- a/include/matx/operators/conv.h
+++ b/include/matx/operators/conv.h
@@ -75,7 +75,7 @@ namespace matx
 
         __MATX_INLINE__ Conv1DOp(const OpA &A, const OpB &B, matxConvCorrMode_t mode, matxConvCorrMethod_t method, PermDims perm) : 
               a_(A), b_(B), mode_(mode), method_(method), perm_(perm) {
-
+          MATX_LOG_TRACE("{} constructor: mode={}, method={}", str(), static_cast<int>(mode), static_cast<int>(method));
           MATX_ASSERT_STR((!is_matx_type_v<typename OpA::value_type> && !is_matx_type_v<typename OpB::value_type>) || 
                           method == MATX_C_METHOD_DIRECT, 
             matxInvalidType, "FFT convolutions do not support half precision float currently");
@@ -304,7 +304,7 @@ namespace detail {
 
       __MATX_INLINE__ Conv2DOp(const OpA &A, const OpB &B, matxConvCorrMode_t mode, PermDims perm) : 
             a_(A), b_(B), mode_(mode), perm_(perm) {
-
+        MATX_LOG_TRACE("{} constructor: mode={}", str(), static_cast<int>(mode));
         // Currently when using the axis parameter the rank of inputs must be equal
         if constexpr (!std::is_same_v<PermDims, no_permute_t>) {
           for (int r = 0; r < Rank(); r++) {
diff --git a/include/matx/operators/corr.h b/include/matx/operators/corr.h
index 29e9a73e3..7d72e63a0 100644
--- a/include/matx/operators/corr.h
+++ b/include/matx/operators/corr.h
@@ -71,7 +71,7 @@ namespace matx
 
         __MATX_INLINE__ CorrOp(const OpA &A, const OpB &B, matxConvCorrMode_t mode, [[maybe_unused]] matxConvCorrMethod_t method, PermDims perm) : 
               a_(A), b_(B), mode_(mode), method_(method), perm_(perm) {
-
+          MATX_LOG_TRACE("{} constructor: mode={}, method={}", str(), static_cast<int>(mode), static_cast<int>(method));
           // Currently when using the axis parameter the rank of inputs must be equal
           if constexpr (!std::is_same_v<PermDims, no_permute_t>) {
             for (int r = 0; r < Rank(); r++) {
diff --git a/include/matx/operators/cov.h b/include/matx/operators/cov.h
index bb3f14410..4f16c6eae 100644
--- a/include/matx/operators/cov.h
+++ b/include/matx/operators/cov.h
@@ -62,7 +62,7 @@ namespace matx
 
         __MATX_INLINE__ CovOp(const OpA &A) : 
               a_(A) {
-          
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           for (int r = 0; r < Rank(); r++) {
             out_dims_[r] = a_.Size(r);
           }
diff --git a/include/matx/operators/cross.h b/include/matx/operators/cross.h
index 65033ce5e..a4f5c5f0d 100644
--- a/include/matx/operators/cross.h
+++ b/include/matx/operators/cross.h
@@ -66,6 +66,7 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "cross()"; }
         __MATX_INLINE__ CrossOp(const OpA &A, const OpB &B) : a_(A), b_(B) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           MATX_STATIC_ASSERT_STR(OpA::Rank() >= 1 && OpB::Rank() >= 1, matxInvalidDim, "Operators to cross() must have rank GTE one.");
 
           //dims other than the last are batched, so count R-->L, beginning one-left of the right-most dim
diff --git a/include/matx/operators/cumsum.h b/include/matx/operators/cumsum.h
index 1e3d70dc1..0d82d6a45 100644
--- a/include/matx/operators/cumsum.h
+++ b/include/matx/operators/cumsum.h
@@ -64,6 +64,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "cumsum()"; }
       __MATX_INLINE__ CumSumOp(const OpA &a) : a_(a) { 
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         for (int r = 0; r < Rank(); r++) {
           out_dims_[r] = a_.Size(r);
         }
diff --git a/include/matx/operators/dct.h b/include/matx/operators/dct.h
index 8a3ea10e5..87d63c547 100644
--- a/include/matx/operators/dct.h
+++ b/include/matx/operators/dct.h
@@ -54,7 +54,9 @@ template <typename I, typename Out> class dctOp : public BaseOp<dctOp<I, Out>> {
   index_t N_;
 
 public:
-  dctOp(Out out, I in, index_t N) : out_(out), in_(in), N_(N) {}
+  dctOp(Out out, I in, index_t N) : out_(out), in_(in), N_(N) {
+    MATX_LOG_TRACE("dctOp constructor: N={}", N);
+  }
 
   template <typename CapType>
   __MATX_DEVICE__ inline void operator()(index_t idx)
diff --git a/include/matx/operators/dense2sparse.h b/include/matx/operators/dense2sparse.h
index 8224dd09d..ca0ae78e5 100644
--- a/include/matx/operators/dense2sparse.h
+++ b/include/matx/operators/dense2sparse.h
@@ -51,7 +51,9 @@ class Dense2SparseOp : public BaseOp<Dense2SparseOp<OpA>> {
   using tosparse_xform_op = bool;
   using value_type = typename OpA::value_type;
 
-  __MATX_INLINE__ Dense2SparseOp(const OpA &a) : a_(a) {}
+  __MATX_INLINE__ Dense2SparseOp(const OpA &a) : a_(a) {
+    MATX_LOG_TRACE("{} constructor: rank={}", str(), OpA::Rank());
+  }
 
   __MATX_INLINE__ std::string str() const {
     return "dense2sparse(" + get_type_str(a_) + ")";
diff --git a/include/matx/operators/det.h b/include/matx/operators/det.h
index 53f4e6ba5..72724414a 100644
--- a/include/matx/operators/det.h
+++ b/include/matx/operators/det.h
@@ -55,7 +55,9 @@ namespace detail {
       using det_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "det()"; }
-      __MATX_INLINE__ DetOp(const OpA &a) : a_(a) { }
+      __MATX_INLINE__ DetOp(const OpA &a) : a_(a) {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+      }
 
       // This should never be called
       template <typename... Is>
diff --git a/include/matx/operators/diag.h b/include/matx/operators/diag.h
index dbff99641..2bbc966ae 100644
--- a/include/matx/operators/diag.h
+++ b/include/matx/operators/diag.h
@@ -61,7 +61,9 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "diag(" + op_.str() + ")"; }
 
-        __MATX_INLINE__ DiagOp(const T1 &op, index_t k) : op_(op), k_(k) { }
+        __MATX_INLINE__ DiagOp(const T1 &op, index_t k) : op_(op), k_(k) {
+          MATX_LOG_TRACE("{} constructor: k={}", str(), k);
+        }
 
         template <typename CapType, typename... Is>
         __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices) const
diff --git a/include/matx/operators/eig.h b/include/matx/operators/eig.h
index 09e6ebb8d..47ac1e1ed 100644
--- a/include/matx/operators/eig.h
+++ b/include/matx/operators/eig.h
@@ -60,7 +60,9 @@ namespace detail {
       using eig_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "eig()"; }
-      __MATX_INLINE__ EigOp(const OpA &a, EigenMode jobz, SolverFillMode uplo) : a_(a), jobz_(jobz), uplo_(uplo) { };
+      __MATX_INLINE__ EigOp(const OpA &a, EigenMode jobz, SolverFillMode uplo) : a_(a), jobz_(jobz), uplo_(uplo) {
+        MATX_LOG_TRACE("{} constructor: jobz={}, uplo={}", str(), static_cast<int>(jobz), static_cast<int>(uplo));
+      };
 
       // This should never be called
       template <typename... Is>
diff --git a/include/matx/operators/einsum.h b/include/matx/operators/einsum.h
index 0e4761070..ad66de75f 100644
--- a/include/matx/operators/einsum.h
+++ b/include/matx/operators/einsum.h
@@ -58,7 +58,9 @@ namespace detail {
       using einsum_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "einsum()"; }
-      __MATX_INLINE__ EinsumOp(const std::string &subscripts, const OpA&... ops) : subscripts_(subscripts), a_(ops...) { };
+      __MATX_INLINE__ EinsumOp(const std::string &subscripts, const OpA&... ops) : subscripts_(subscripts), a_(ops...) {
+        MATX_LOG_TRACE("{} constructor: subscripts=\"{}\"", str(), subscripts);
+      };
 
       // This should never be called
       template <typename... Is>
diff --git a/include/matx/operators/fft.h b/include/matx/operators/fft.h
index 82e43ab63..06c4c7e4a 100644
--- a/include/matx/operators/fft.h
+++ b/include/matx/operators/fft.h
@@ -105,6 +105,7 @@ namespace matx
 
         __MATX_INLINE__ FFTOp(const OpA &a, index_t size, PermDims perm, FFTNorm norm) : 
             a_(a), fft_size_(size),  perm_(perm), norm_(norm) {
+          MATX_LOG_TRACE("{} constructor: fft_size={}, norm={}", str(), size, static_cast<int>(norm));
           for (int r = 0; r < Rank(); r++) {
             out_dims_[r] = a_.Size(r);
           }
@@ -697,6 +698,7 @@ namespace matx
         }
 
         __MATX_INLINE__ FFT2Op(const OpA &a, PermDims perm, FFTNorm norm) : a_(a),  perm_(perm), norm_(norm) {
+          MATX_LOG_TRACE("{} constructor: norm={}", str(), static_cast<int>(norm));
           for (int r = 0; r < Rank(); r++) {
             out_dims_[r] = a_.Size(r);
           }
diff --git a/include/matx/operators/fftshift.h b/include/matx/operators/fftshift.h
index 031aae07a..adf62a38a 100644
--- a/include/matx/operators/fftshift.h
+++ b/include/matx/operators/fftshift.h
@@ -52,6 +52,7 @@ namespace matx
         __MATX_INLINE__ std::string str() const { return "fftshift(" + op_.str() + ")"; }
 
         __MATX_INLINE__ FFTShift1DOp(const T1 &op) : op_(op){
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           static_assert(Rank() >= 1, "1D FFT shift must have a rank 1 operator or higher");
         };
 
@@ -169,6 +170,7 @@ namespace matx
         __MATX_INLINE__ std::string str() const { return "fftshift(" + op_.str() + ")"; }
 
         __MATX_INLINE__ FFTShift2DOp(const T1 &op) : op_(op){
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           static_assert(Rank() >= 2, "2D FFT shift must have a rank 2 operator or higher");
         };
 
diff --git a/include/matx/operators/filter.h b/include/matx/operators/filter.h
index c282dfa2a..2af830a3b 100644
--- a/include/matx/operators/filter.h
+++ b/include/matx/operators/filter.h
@@ -65,6 +65,7 @@ namespace detail {
       }
       __MATX_INLINE__ FilterOp(const OpA &a, const cuda::std::array<FilterType, NR> h_rec,
             const cuda::std::array<FilterType, NNR> h_nonrec) : a_(a), h_rec_(h_rec), h_nonrec_(h_nonrec) { 
+        MATX_LOG_TRACE("{} constructor: rank={}, NR={}, NNR={}", str(), Rank(), NR, NNR);
         for (int r = 0; r < Rank(); r++) {
           out_dims_[r] = a_.Size(r);
         }              
diff --git a/include/matx/operators/find.h b/include/matx/operators/find.h
index 09be65d6f..a51b171cf 100644
--- a/include/matx/operators/find.h
+++ b/include/matx/operators/find.h
@@ -56,7 +56,9 @@ namespace detail {
       using find_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "find()"; }
-      __MATX_INLINE__ FindOp(const OpA &a, SelectType sel) : a_(a), sel_(sel) { };
+      __MATX_INLINE__ FindOp(const OpA &a, SelectType sel) : a_(a), sel_(sel) {
+        MATX_LOG_TRACE("{} constructor", str());
+      };
 
       // This should never be called
       template <typename... Is>
diff --git a/include/matx/operators/find_idx.h b/include/matx/operators/find_idx.h
index fedde45ac..9827b1d79 100644
--- a/include/matx/operators/find_idx.h
+++ b/include/matx/operators/find_idx.h
@@ -56,7 +56,9 @@ namespace detail {
       using find_idx_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "find_idx()"; }
-      __MATX_INLINE__ FindIdxOp(const OpA &a, SelectType sel) : a_(a), sel_(sel) { };
+      __MATX_INLINE__ FindIdxOp(const OpA &a, SelectType sel) : a_(a), sel_(sel) {
+        MATX_LOG_TRACE("{} constructor", str());
+      };
 
       // This should never be called
       template <typename... Is>
diff --git a/include/matx/operators/find_peaks.h b/include/matx/operators/find_peaks.h
index a15d75f38..5a2d37540 100644
--- a/include/matx/operators/find_peaks.h
+++ b/include/matx/operators/find_peaks.h
@@ -61,6 +61,7 @@ namespace detail {
       __MATX_INLINE__ FindPeaksOp(const OpA &a, value_type height, 
                                                 value_type threshold) : 
                                                 a_(a), height_(height), threshold_(threshold) { 
+        MATX_LOG_TRACE("{} constructor: height={}, threshold={}", str(), height, threshold);
       }
 
       template <typename... Is>
diff --git a/include/matx/operators/flatten.h b/include/matx/operators/flatten.h
index e4b8e21a7..f83f6125f 100644
--- a/include/matx/operators/flatten.h
+++ b/include/matx/operators/flatten.h
@@ -55,6 +55,7 @@ namespace matx
         __MATX_INLINE__ FlattenOp(const T1 &op1) : op1_(op1)
         {
           static_assert(T1::Rank() > 1, "flatten has no effect on tensors of rank 0 and 1");
+          MATX_LOG_TRACE("{} constructor: input_rank={}, output_rank=1", str(), T1::Rank());
         }
 
         template <typename CapType, typename Is>
diff --git a/include/matx/operators/frexp.h b/include/matx/operators/frexp.h
index f1092a4ed..58ce0c3ec 100644
--- a/include/matx/operators/frexp.h
+++ b/include/matx/operators/frexp.h
@@ -51,6 +51,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "frexp()"; }
       __MATX_INLINE__ FrexpOp(const OpA &a) : a_(a) {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         static_assert(std::is_floating_point_v<value_type> ||
                       is_cuda_complex_v<value_type>, "frexp() must take a floating point input");
 
diff --git a/include/matx/operators/hermitian.h b/include/matx/operators/hermitian.h
index 4873e2c70..89db0f41c 100644
--- a/include/matx/operators/hermitian.h
+++ b/include/matx/operators/hermitian.h
@@ -57,6 +57,7 @@ namespace matx
 
 	      __MATX_INLINE__ std::string str() const { return "hermitian(" + op_.str() + ")"; }
         __MATX_INLINE__ HermitianTransOp(const T1 &op) : op_(op) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           static_assert(Rank() >= 2, "Hermitian operation needs input with rank >= 2");
         }
 
diff --git a/include/matx/operators/hist.h b/include/matx/operators/hist.h
index a9a6a8886..5dbc22a91 100644
--- a/include/matx/operators/hist.h
+++ b/include/matx/operators/hist.h
@@ -64,6 +64,7 @@ namespace detail {
       __MATX_INLINE__ std::string str() const { return "hist()"; }
       __MATX_INLINE__ HistOp(const OpA &a, typename OpA::value_type lower, typename OpA::value_type upper, int num_levels) : 
           a_(a), lower_(lower), upper_(upper), num_levels_(num_levels) { 
+        MATX_LOG_TRACE("{} constructor: num_levels={}", str(), num_levels);
         for (int r = 0; r < Rank(); r++) {
           out_dims_[r] = a_.Size(r);
         }
diff --git a/include/matx/operators/if.h b/include/matx/operators/if.h
index 44554ff3d..df86ab006 100644
--- a/include/matx/operators/if.h
+++ b/include/matx/operators/if.h
@@ -70,6 +70,7 @@ namespace matx
        */
       __MATX_INLINE__ IFOP(const T1 &cond, const T2 &op) : cond_(cond), op_(op)
       {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         static_assert((!is_tensor_view_v<T2>),
             "Only operator emmitters are allowed in IF. Tensor views are "
             "not allowed");
diff --git a/include/matx/operators/ifelse.h b/include/matx/operators/ifelse.h
index e6ce941d8..8a46f48e0 100644
--- a/include/matx/operators/ifelse.h
+++ b/include/matx/operators/ifelse.h
@@ -77,6 +77,7 @@ namespace matx
       __MATX_INLINE__ IFELSEOp(const C1 &cond, const T1 &op1, const T2 &op2) :
                               cond_(cond), op1_(op1), op2_(op2)
     {
+      MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
       static_assert((!is_tensor_view_v<T1> && !is_tensor_view_v<T2>),
           "Only operator emmitters are allowed in IFELSE. Tensor views "
           "are not allowed");
diff --git a/include/matx/operators/index.h b/include/matx/operators/index.h
index a6b07a221..68004af8d 100644
--- a/include/matx/operators/index.h
+++ b/include/matx/operators/index.h
@@ -53,7 +53,9 @@ namespace matx
         using value_type = index_t;
 
         __MATX_INLINE__ std::string str() const { return "index()"; } 
-        __MATX_INLINE__ IndexOp(int dim) : dim_(dim){};  
+        __MATX_INLINE__ IndexOp(int dim) : dim_(dim){
+          MATX_LOG_TRACE("{} constructor: dim={}", str(), dim);
+        };  
 
         template <typename CapType, typename... Is>
         __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ auto operator()(Is... indices) const 
diff --git a/include/matx/operators/interleaved.h b/include/matx/operators/interleaved.h
index 94ad969a8..2546b6f34 100644
--- a/include/matx/operators/interleaved.h
+++ b/include/matx/operators/interleaved.h
@@ -55,6 +55,7 @@ namespace matx
         __MATX_INLINE__ std::string str() const { return "interleaved(" + op_.str() + ")"; }
 
         __MATX_INLINE__ ComplexInterleavedOp(const T1 &op) : op_(op) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           static_assert(!is_complex_v<extract_value_type_t<T1>>, "Complex interleaved op only works on scalar input types");
           static_assert(Rank() > 0);
         };
diff --git a/include/matx/operators/interp.h b/include/matx/operators/interp.h
index df4b81022..0ca38f054 100644
--- a/include/matx/operators/interp.h
+++ b/include/matx/operators/interp.h
@@ -397,6 +397,7 @@ namespace matx {
         xq_(xq),
         method_(method)
       {
+        MATX_LOG_TRACE("{} constructor: method={}", str(), static_cast<int>(method));
         if (x_.Size(x_.Rank() - 1) != v_.Size(v_.Rank() - 1)) {
           MATX_THROW(matxInvalidSize, "interp1: sample points and values must have the same size in the last dimension");
         }
diff --git a/include/matx/operators/inverse.h b/include/matx/operators/inverse.h
index 813af389d..d09f1ad06 100644
--- a/include/matx/operators/inverse.h
+++ b/include/matx/operators/inverse.h
@@ -57,7 +57,9 @@ namespace detail {
       using inv_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "inv()"; }
-      __MATX_INLINE__ InvOp(const OpA &a) : a_(a) {};
+      __MATX_INLINE__ InvOp(const OpA &a) : a_(a) {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+      };
 
 
       template <typename CapType, typename... Is>
diff --git a/include/matx/operators/isclose.h b/include/matx/operators/isclose.h
index 60e79fb4e..669239629 100644
--- a/include/matx/operators/isclose.h
+++ b/include/matx/operators/isclose.h
@@ -55,6 +55,7 @@ namespace matx
         __MATX_INLINE__ IsCloseOp(const Op1 &op1, const Op2 &op2, double rtol, double atol) :
           op1_(op1), op2_(op2), rtol_(static_cast<inner_type>(rtol)), atol_(static_cast<inner_type>(atol))
         {
+          MATX_LOG_TRACE("{} constructor: rtol={}, atol={}", str(), rtol, atol);
           static_assert(Op1::Rank() == Op2::Rank(), "Operator ranks must match in isclose()");
           MATX_ASSERT_COMPATIBLE_OP_SIZES(op1);
           MATX_ASSERT_COMPATIBLE_OP_SIZES(op2);
diff --git a/include/matx/operators/kronecker.h b/include/matx/operators/kronecker.h
index 7389c6a2e..ad17e9263 100644
--- a/include/matx/operators/kronecker.h
+++ b/include/matx/operators/kronecker.h
@@ -61,6 +61,7 @@ namespace matx
 
         __MATX_INLINE__ KronOp(const T1 &op1, const T2 &op2) : op1_(op1), op2_(op2)
         {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           static_assert(RankGTE(Rank(), 2), "Kronecker product must be used on tensors with rank 2 or higher");
         }        
 
diff --git a/include/matx/operators/legendre.h b/include/matx/operators/legendre.h
index 2f8bb611c..d07371f27 100644
--- a/include/matx/operators/legendre.h
+++ b/include/matx/operators/legendre.h
@@ -95,6 +95,7 @@ namespace matx
         __MATX_INLINE__ std::string str() const { return "legendre(" + get_type_str(n_) + "," + get_type_str(m_) + "," + get_type_str(in_) + ")"; }
 
         __MATX_INLINE__ LegendreOp(const T1 &n, const T2 &m, const T3 &in, cuda::std::array<int,2> axis) : n_(n), m_(m), in_(in), axis_(axis) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           static_assert(get_rank<T1>() <= 1, "legendre op:  n must be a scalar, rank 0 or 1 operator");
           static_assert(get_rank<T2>() <= 1, "legendre op:  m must be a scalar, rank 0 or 1 operator");
         }
diff --git a/include/matx/operators/lu.h b/include/matx/operators/lu.h
index 5a5fee112..54ce65fcc 100644
--- a/include/matx/operators/lu.h
+++ b/include/matx/operators/lu.h
@@ -55,7 +55,9 @@ namespace detail {
       using lu_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "lu()"; }
-      __MATX_INLINE__ LUOp(const OpA &a) : a_(a) { };
+      __MATX_INLINE__ LUOp(const OpA &a) : a_(a) {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+      };
 
       // This should never be called
       template <typename... Is>
diff --git a/include/matx/operators/matmul.h b/include/matx/operators/matmul.h
index e72ec621e..a341840e9 100644
--- a/include/matx/operators/matmul.h
+++ b/include/matx/operators/matmul.h
@@ -73,6 +73,7 @@ namespace matx
 
         __MATX_INLINE__ MatMulOp(const OpA &a, const OpB &b, float alpha, float beta, PermDims perm) : 
               a_(a), b_(b), alpha_(alpha), beta_(beta), perm_(perm) {
+          MATX_LOG_TRACE("{} constructor: alpha={}, beta={}", str(), alpha, beta);
           if constexpr (!std::is_same_v<PermDims, no_permute_t>) {
             for (int r = 0; r < Rank(); r++) {
               if (r == Rank() - 2) {
diff --git a/include/matx/operators/matvec.h b/include/matx/operators/matvec.h
index 288ee8c16..692e0e231 100644
--- a/include/matx/operators/matvec.h
+++ b/include/matx/operators/matvec.h
@@ -67,7 +67,7 @@ namespace matx
 
         __MATX_INLINE__ MatVecOp(const OpA &A, const OpB &B, float alpha, float beta) : 
               a_(A), b_(B), alpha_(alpha), beta_(beta) {
-          
+          MATX_LOG_TRACE("{} constructor: alpha={}, beta={}", str(), alpha, beta);
           for (int r = 0; r < Rank(); r++) {
             out_dims_[r] = a_.Size(r);
           }
diff --git a/include/matx/operators/max.h b/include/matx/operators/max.h
index 3a8fcdb43..415c01459 100644
--- a/include/matx/operators/max.h
+++ b/include/matx/operators/max.h
@@ -60,6 +60,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "max(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ MaxOp(const OpA &a) : a_(a) { 
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         for (int r = 0; r < ORank; r++) {
           out_dims_[r] = a_.Size(r);
         }        
diff --git a/include/matx/operators/mean.h b/include/matx/operators/mean.h
index eb1ae6ff6..b928d4230 100644
--- a/include/matx/operators/mean.h
+++ b/include/matx/operators/mean.h
@@ -60,6 +60,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "mean(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ MeanOp(const OpA &a) : a_(a) { 
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         for (int r = 0; r < ORank; r++) {
           out_dims_[r] = a_.Size(r);
         }            
diff --git a/include/matx/operators/median.h b/include/matx/operators/median.h
index e48709b60..5e7cc24dd 100644
--- a/include/matx/operators/median.h
+++ b/include/matx/operators/median.h
@@ -60,6 +60,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "median(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ MedianOp(const OpA &a) : a_(a) { 
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         for (int r = 0; r < ORank; r++) {
           out_dims_[r] = a_.Size(r);
         }            
diff --git a/include/matx/operators/min.h b/include/matx/operators/min.h
index ae6b48557..b4fe1fe15 100644
--- a/include/matx/operators/min.h
+++ b/include/matx/operators/min.h
@@ -60,6 +60,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "min(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ MinOp(const OpA &a) : a_(a) { 
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         for (int r = 0; r < ORank; r++) {
           out_dims_[r] = a_.Size(r);
         }        
diff --git a/include/matx/operators/norm.h b/include/matx/operators/norm.h
index c85e0b00c..351e2d803 100644
--- a/include/matx/operators/norm.h
+++ b/include/matx/operators/norm.h
@@ -70,6 +70,7 @@ namespace matx
       }
 
       __MATX_INLINE__ NormOp(const OpA &op, NormOrder order) : a_(op), order_(order) {
+        MATX_LOG_TRACE("{} constructor: order={}", str(), static_cast<int>(order));
         if constexpr (std::is_same_v<NormType, detail::NormTypeVector>) {
           MATX_ASSERT_STR(order == NormOrder::NONE || order == NormOrder::L1 || order == NormOrder::L2, matxInvalidParameter,
             "Invalid norm order used for vector mode");
diff --git a/include/matx/operators/normalize.h b/include/matx/operators/normalize.h
index d4de70556..4db08cf7d 100644
--- a/include/matx/operators/normalize.h
+++ b/include/matx/operators/normalize.h
@@ -72,10 +72,12 @@ namespace matx
         using self_type = NormalizeOp<OpA, DIM>;
 
         __MATX_INLINE__ NormalizeOp(const OpA &op, const NORMALIZE_RANGE method): op_(op), normalize_method(method) {
+          MATX_LOG_TRACE("{} constructor: method={}", str(), static_cast<int>(method));
           InitNormalize();
         }
 
         __MATX_INLINE__ NormalizeOp(const OpA &op, const NORMALIZE_RANGE method, const float p): op_(op), normalize_method(method),  p_(p){
+          MATX_LOG_TRACE("{} constructor: method={}, p={}", str(), static_cast<int>(method), p);
           MATX_ASSERT_STR(normalize_method == NORMALIZE_RANGE::NORM, matxInvalidParameter, "p value can be specified for only p-norm");
           InitNormalize();
         }
diff --git a/include/matx/operators/outer.h b/include/matx/operators/outer.h
index 958adc4e3..e8b4a1411 100644
--- a/include/matx/operators/outer.h
+++ b/include/matx/operators/outer.h
@@ -67,7 +67,7 @@ namespace matx
 
         __MATX_INLINE__ OuterOp(const OpA &A, const OpB &B, float alpha, float beta) : 
               a_(A), b_(B), alpha_(alpha), beta_(beta) {
-
+          MATX_LOG_TRACE("{} constructor: alpha={}, beta={}", str(), alpha, beta);
           out_dims_[RANK - 1] = b_.Size(OpB::Rank() - 1);
           out_dims_[RANK - 2] = a_.Size(OpA::Rank() - 1);
           if constexpr (remove_cvref_t<OpA>::Rank() >= remove_cvref_t<OpB>::Rank()) {
diff --git a/include/matx/operators/overlap.h b/include/matx/operators/overlap.h
index 0ca7df566..5dc40510a 100644
--- a/include/matx/operators/overlap.h
+++ b/include/matx/operators/overlap.h
@@ -65,7 +65,7 @@ namespace matx
         __MATX_INLINE__ std::string str() const { return "overlap(" + op_.str() + ")"; }
         __MATX_INLINE__ OverlapOp(const T &op, const cuda::std::array<shape_type, DIM> &windows,
                                       const cuda::std::array<shape_type, DIM> &strides) : op_(op) {
-
+          MATX_LOG_TRACE("{} constructor: dim={}, rank={}", str(), DIM, Rank());
           // This only works for 1D tensors going to 2D at the moment. Generalize to
           // higher dims later
           index_t window_size = windows[0];
diff --git a/include/matx/operators/pad.h b/include/matx/operators/pad.h
index df7687754..caf9ebcad 100644
--- a/include/matx/operators/pad.h
+++ b/include/matx/operators/pad.h
@@ -81,6 +81,7 @@ namespace matx
       __MATX_INLINE__ PadOp(const T& op, int axis, const PadSizeType& pad_sizes, const value_type& pad_value, PadMode mode = MATX_PAD_MODE_CONSTANT) 
         : op_(op), axis_(axis), pad_value_(pad_value), mode_(mode)
       {
+        MATX_LOG_TRACE("{} constructor: axis={}, mode={}", str(), axis, static_cast<int>(mode));
         static_assert(RANK > 0, "Cannot pad rank-0 tensors");
         MATX_ASSERT_STR(axis >= 0 && axis < RANK, matxInvalidDim, "pad axis must be >= 0 and less than the rank of the operator");
         MATX_ASSERT_STR(pad_sizes.size() == 2, matxInvalidParameter, "pad_sizes must contain exactly 2 elements [before, after]");
diff --git a/include/matx/operators/percentile.h b/include/matx/operators/percentile.h
index c8b3ecc56..353b91086 100644
--- a/include/matx/operators/percentile.h
+++ b/include/matx/operators/percentile.h
@@ -61,6 +61,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "percentile(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ PercentileOp(const OpA &a, unsigned char q, PercentileMethod method) : a_(a), q_(q), method_(method) {
+        MATX_LOG_TRACE("{} constructor: q={}, method={}", str(), static_cast<int>(q), static_cast<int>(method));
         for (int r = 0; r < ORank; r++) {
           out_dims_[r]    = (r == ORank - 1) ? 1 : a_.Size(r);
         }
diff --git a/include/matx/operators/permute.h b/include/matx/operators/permute.h
index f7c151cce..3e408c473 100644
--- a/include/matx/operators/permute.h
+++ b/include/matx/operators/permute.h
@@ -74,6 +74,7 @@ namespace matx
 
             dims_[i] = dims[i];
           }
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         }
 
         template <typename CapType, typename Op, typename Dims, typename... Is>
diff --git a/include/matx/operators/pinv.h b/include/matx/operators/pinv.h
index a109a30ea..dde0cf124 100644
--- a/include/matx/operators/pinv.h
+++ b/include/matx/operators/pinv.h
@@ -59,6 +59,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "pinv()"; }
       __MATX_INLINE__ PinvOp(const OpA &a, float rcond) : a_(a), rcond_(rcond) {
+        MATX_LOG_TRACE("{} constructor: rcond={}", str(), rcond);
         for (int r = 0; r < Rank(); r++) {
           if (r >= Rank() - 2) {
             out_dims_[r] = (r == Rank() - 1) ? a_.Size(Rank() - 2) : a_.Size(Rank() - 1);
diff --git a/include/matx/operators/planar.h b/include/matx/operators/planar.h
index 78998a878..7dccefbf7 100644
--- a/include/matx/operators/planar.h
+++ b/include/matx/operators/planar.h
@@ -54,6 +54,7 @@ namespace matx
         __MATX_INLINE__ ComplexPlanarOp(const T1 &op) : op_(op) {
           static_assert(is_complex_v<extract_value_type_t<T1>>, "Complex planar op only works on complex types");
           static_assert(Rank() > 0);
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         };
 
         template <typename CapType, typename... Is>
diff --git a/include/matx/operators/polyval.h b/include/matx/operators/polyval.h
index b028f2481..34760ce51 100644
--- a/include/matx/operators/polyval.h
+++ b/include/matx/operators/polyval.h
@@ -56,6 +56,7 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "polyval()"; }
         __MATX_INLINE__ PolyvalOp(const Op &op, const Coeffs &coeffs) : op_(op), coeffs_(coeffs) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           MATX_STATIC_ASSERT_STR(Coeffs::Rank() == 1, matxInvalidDim, "Coefficient must be rank 1");
           MATX_STATIC_ASSERT_STR(Op::Rank() == 1, matxInvalidDim, "Input operator must be rank 1");
         };
diff --git a/include/matx/operators/prod.h b/include/matx/operators/prod.h
index 7a9f41c71..6ac580de1 100644
--- a/include/matx/operators/prod.h
+++ b/include/matx/operators/prod.h
@@ -60,6 +60,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "prod(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ ProdOp(const OpA &a) : a_(a) { 
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         for (int r = 0; r < ORank; r++) {
           out_dims_[r] = a_.Size(r);
         }                    
diff --git a/include/matx/operators/pwelch.h b/include/matx/operators/pwelch.h
index 44bbea818..5ec291bdc 100644
--- a/include/matx/operators/pwelch.h
+++ b/include/matx/operators/pwelch.h
@@ -72,6 +72,7 @@ namespace matx
               output_scale_mode_(output_scale_mode),
               fs_(fs)
         {
+          MATX_LOG_TRACE("{} constructor: nperseg={}, noverlap={}, nfft={}, fs={}", str(), nperseg, noverlap, nfft, fs);
           MATX_STATIC_ASSERT_STR(OpX::Rank() == 1, matxInvalidDim, "pwelch:  Only input rank of 1 is supported presently");
           for (int r = 0; r < OpX::Rank(); r++) {
             out_dims_[r] = nfft_;
diff --git a/include/matx/operators/qr.h b/include/matx/operators/qr.h
index 69e1b61f3..ad3d44903 100644
--- a/include/matx/operators/qr.h
+++ b/include/matx/operators/qr.h
@@ -56,7 +56,9 @@ namespace detail {
       using qr_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "qr(" + get_type_str(a_) + ")"; }
-      __MATX_INLINE__ QROp(const OpA &a) : a_(a) { };
+      __MATX_INLINE__ QROp(const OpA &a) : a_(a) {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+      };
 
       // This should never be called
       template <typename... Is>
@@ -131,7 +133,9 @@ namespace detail {
       using qr_solver_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "qr_solver()"; }
-      __MATX_INLINE__ SolverQROp(const OpA &a) : a_(a) { }    
+      __MATX_INLINE__ SolverQROp(const OpA &a) : a_(a) {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+      }
 
       // This should never be called
       template <typename... Is>
diff --git a/include/matx/operators/r2c.h b/include/matx/operators/r2c.h
index 5d4cbc698..8cd185f1a 100644
--- a/include/matx/operators/r2c.h
+++ b/include/matx/operators/r2c.h
@@ -53,6 +53,7 @@ namespace matx
         __MATX_INLINE__ std::string str() const { return "r2c(" + op_.str() + ")"; }
 
         __MATX_INLINE__ R2COp(const T1 &op, index_t orig) : op_(op), orig_size_(orig) {
+          MATX_LOG_TRACE("{} constructor: rank={}, orig_size={}", str(), Rank(), orig);
           static_assert(Rank() >= 1, "R2COp must have a rank 1 operator or higher");
         };
 
diff --git a/include/matx/operators/reduce.h b/include/matx/operators/reduce.h
index 0b5e32db4..1fd835f3a 100644
--- a/include/matx/operators/reduce.h
+++ b/include/matx/operators/reduce.h
@@ -66,6 +66,7 @@ namespace matx
 
         __MATX_INLINE__ ReduceOp(const OpA &A, PermDims perm, ReductionOp rop, bool init) : 
               a_(A), perm_(perm), reduction_op_(rop), init_(init) {
+          MATX_LOG_TRACE("{} constructor: rop={}, init={}", str(), static_cast<int>(rop), init);
           for (int r = 0; r < ORank; r++) {
             out_dims_[r] = a_.Size(r);
           }
diff --git a/include/matx/operators/remap.h b/include/matx/operators/remap.h
index 814fadbf0..ea7f4dfba 100644
--- a/include/matx/operators/remap.h
+++ b/include/matx/operators/remap.h
@@ -63,7 +63,9 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "remap(" + op_.str() + ")"; }
 
-	      __MATX_INLINE__ RemapOp(const T &op, IdxType idx) : op_(op), idx_(idx) {};
+	      __MATX_INLINE__ RemapOp(const T &op, IdxType idx) : op_(op), idx_(idx) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+        };
 
         // Only supports one element per thread
         template <typename CapType, typename Op, typename Idx, typename... Is>
diff --git a/include/matx/operators/repmat.h b/include/matx/operators/repmat.h
index af6d15985..0ea2db446 100644
--- a/include/matx/operators/repmat.h
+++ b/include/matx/operators/repmat.h
@@ -67,6 +67,7 @@ namespace matx
           {
             reps_[dim] = reps;
           }
+          MATX_LOG_TRACE("{} constructor: rank={}, reps={}", str(), DIM, reps);
         }
 
         __MATX_INLINE__ RepMatOp(const T1 &op, const cuda::std::array<index_t, DIM> reps) : op_(op)
@@ -75,6 +76,7 @@ namespace matx
           {
             reps_[dim] = reps[dim];
           }
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), DIM);
         }
 
         __MATX_INLINE__ RepMatOp(const T1 &op, const index_t *reps) : op_(op)
diff --git a/include/matx/operators/resample_poly.h b/include/matx/operators/resample_poly.h
index 12e5aa567..752a57d65 100644
--- a/include/matx/operators/resample_poly.h
+++ b/include/matx/operators/resample_poly.h
@@ -66,7 +66,8 @@ namespace detail {
       __MATX_INLINE__ std::string str() const { return "resample_poly(" + get_type_str(a_) + "," + get_type_str(f_) + ")";}
       __MATX_INLINE__ ResamplePolyOp(const OpA &a, const FilterType &f, index_t up, index_t down) : 
           a_(a), f_(f), up_(up), down_(down) 
-      { 
+      {
+        MATX_LOG_TRACE("{} constructor: up={}, down={}", str(), up, down); 
         const index_t up_len = a_.Size(OpA::Rank() - 1) * up_;
         const index_t b_len = up_len / down_ + ((up_len % down_) ? 1 : 0);
 
diff --git a/include/matx/operators/reshape.h b/include/matx/operators/reshape.h
index f470bf2a8..073ae45ad 100644
--- a/include/matx/operators/reshape.h
+++ b/include/matx/operators/reshape.h
@@ -77,6 +77,7 @@ namespace matx
           }
 
           MATX_ASSERT_STR(size == TotalSize(op_), matxInvalidSize, "ReshapeOp: TotalSize of reshape must match");
+          MATX_LOG_TRACE("{} constructor: rank={}, total_size={}", str(), Rank(), size);
         };
 
         template <typename CapType, typename Op, typename... Is>
diff --git a/include/matx/operators/reverse.h b/include/matx/operators/reverse.h
index 6d27a15e9..1fc30e96f 100644
--- a/include/matx/operators/reverse.h
+++ b/include/matx/operators/reverse.h
@@ -61,7 +61,9 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "reverse(" + op_.str() + ")"; }
 
-        __MATX_INLINE__ ReverseOp(const T1 &op) : op_(op){};
+        __MATX_INLINE__ ReverseOp(const T1 &op) : op_(op){
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), DIM);
+        };
 
         template <typename CapType, typename Op, typename... Is>
         static __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) get_impl(Op&& op, Is... indices)
diff --git a/include/matx/operators/select.h b/include/matx/operators/select.h
index 9e7f834aa..a26e883cb 100644
--- a/include/matx/operators/select.h
+++ b/include/matx/operators/select.h
@@ -57,7 +57,9 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "select(" + op_.str() + ")"; }
 
-        __MATX_INLINE__ SelectOp(const T &op, IdxType idx) : op_(op), idx_(idx) {};  
+        __MATX_INLINE__ SelectOp(const T &op, IdxType idx) : op_(op), idx_(idx) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+        };  
 
         template <typename CapType, typename Op, typename Idx, typename... Is>
         static __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) get_impl(Op&& op, const Idx &idx, index_t i)
diff --git a/include/matx/operators/self.h b/include/matx/operators/self.h
index 3bf6ed28c..6d2cbddf3 100644
--- a/include/matx/operators/self.h
+++ b/include/matx/operators/self.h
@@ -57,7 +57,9 @@ namespace matx
         
         __MATX_INLINE__ std::string str() const { return "self(" + op_.str() + ")"; }
         
-	      __MATX_INLINE__ SelfOp(const T1 &op) : op_(op) {}
+	      __MATX_INLINE__ SelfOp(const T1 &op) : op_(op) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+        }
 
         template <typename CapType, typename... Is>
         __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices) const 
diff --git a/include/matx/operators/set.h b/include/matx/operators/set.h
index 084917adc..9d096ed41 100644
--- a/include/matx/operators/set.h
+++ b/include/matx/operators/set.h
@@ -98,6 +98,7 @@ class set : public BaseOp<set<T, Op>> {
    */
   inline set(T &out, const Op &op) : out_(out), op_(op)
   {
+    MATX_LOG_TRACE("{} constructor: rank={}", str(), T::Rank());
     static_assert(is_matx_op_lvalue<T>() == true, "Invalid operator on LHS of set/operator=");
     static_assert(!is_matx_transform_op<T>(), "Cannot use transform operator on LHS of assignment");
 
diff --git a/include/matx/operators/shift.h b/include/matx/operators/shift.h
index b19ccf443..5132d6437 100644
--- a/include/matx/operators/shift.h
+++ b/include/matx/operators/shift.h
@@ -63,6 +63,7 @@ namespace matx
 
         __MATX_INLINE__ ShiftOp(const T1 &op, T2 shift) : op_(op), shift_(shift)
         {
+          MATX_LOG_TRACE("{} constructor: dim={}, rank={}", str(), DIM, Rank());
           static_assert(DIM < Rank(), "Dimension to shift must be less than rank of tensor");
 
           MATX_LOOP_UNROLL
diff --git a/include/matx/operators/sign.h b/include/matx/operators/sign.h
index 6372d13b6..7371811b7 100644
--- a/include/matx/operators/sign.h
+++ b/include/matx/operators/sign.h
@@ -56,7 +56,9 @@ namespace matx
         value_type zval_;
 
         __MATX_INLINE__ std::string str() const { return "sign(" + get_type_str(op_) + ")"; }
-        __MATX_INLINE__ SignOp(const T &op, value_type zval) : op_(op), zval_(zval) {};
+        __MATX_INLINE__ SignOp(const T &op, value_type zval) : op_(op), zval_(zval) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+        };
 
         template <typename CapType, typename... Is>
         __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ auto operator()(Is... indices) const 
diff --git a/include/matx/operators/slice.h b/include/matx/operators/slice.h
index 6a62ac565..598f0d9db 100644
--- a/include/matx/operators/slice.h
+++ b/include/matx/operators/slice.h
@@ -108,6 +108,7 @@ namespace matx
             }
           }
           MATX_ASSERT_STR(d==Rank(), matxInvalidDim, "SliceOp: Number of dimensions without matxDropDim must equal new rank.");
+          MATX_LOG_TRACE("{} constructor: input_rank={}, output_rank={}", str(), T::Rank(), DIM);
         };
 
         template <typename CapType, typename Op, typename... Is>
diff --git a/include/matx/operators/softmax.h b/include/matx/operators/softmax.h
index eacff1b15..a239b7868 100644
--- a/include/matx/operators/softmax.h
+++ b/include/matx/operators/softmax.h
@@ -63,6 +63,7 @@ namespace matx
 
         __MATX_INLINE__ SoftmaxOp(const OpA &A, PermDims perm) : 
               a_(A), perm_(perm) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           for (int r = 0; r < OpA::Rank(); r++) {
             out_dims_[r] = a_.Size(r);
           }          
diff --git a/include/matx/operators/solve.h b/include/matx/operators/solve.h
index 0bc0abe1c..0466c7cd3 100644
--- a/include/matx/operators/solve.h
+++ b/include/matx/operators/solve.h
@@ -62,6 +62,7 @@ class SolveOp : public BaseOp<SolveOp<OpA, OpB>> {
   using value_type = typename OpA::value_type;
 
   __MATX_INLINE__ SolveOp(const OpA &a, const OpB &b) : a_(a), b_(b) {
+    MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
     for (int r = 0, rank = Rank(); r < rank; r++) {
       out_dims_[r] = b_.Size(r);
     }
diff --git a/include/matx/operators/sort.h b/include/matx/operators/sort.h
index c76f2cd60..e80545f05 100644
--- a/include/matx/operators/sort.h
+++ b/include/matx/operators/sort.h
@@ -66,6 +66,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "sort()"; }
       __MATX_INLINE__ SortOp(const OpA &a, SortDirection_t dir) : a_(a), dir_(dir) { 
+        MATX_LOG_TRACE("{} constructor: rank={}, dir={}", str(), Rank(), static_cast<int>(dir));
         for (int r = 0; r < Rank(); r++) {
           out_dims_[r] = a_.Size(r);
         }
diff --git a/include/matx/operators/sparse2dense.h b/include/matx/operators/sparse2dense.h
index 6f3e3dc31..c4715d450 100644
--- a/include/matx/operators/sparse2dense.h
+++ b/include/matx/operators/sparse2dense.h
@@ -58,6 +58,7 @@ class Sparse2DenseOp : public BaseOp<Sparse2DenseOp<OpA>> {
   using value_type = typename OpA::value_type;
 
   __MATX_INLINE__ Sparse2DenseOp(const OpA &a) : a_(a) {
+    MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
     for (int r = 0; r < Rank(); r++) {
       out_dims_[r] = a_.Size(r);
     }
diff --git a/include/matx/operators/sparse2sparse.h b/include/matx/operators/sparse2sparse.h
index 1f4895564..a05286fbf 100644
--- a/include/matx/operators/sparse2sparse.h
+++ b/include/matx/operators/sparse2sparse.h
@@ -51,7 +51,9 @@ class Sparse2SparseOp : public BaseOp<Sparse2SparseOp<OpA>> {
   using tosparse_xform_op = bool;
   using value_type = typename OpA::value_type;
 
-  __MATX_INLINE__ Sparse2SparseOp(const OpA &a) : a_(a) {}
+  __MATX_INLINE__ Sparse2SparseOp(const OpA &a) : a_(a) {
+    MATX_LOG_TRACE("{} constructor: rank={}", str(), OpA::Rank());
+  }
 
   __MATX_INLINE__ std::string str() const {
     return "sparse2sparse(" + get_type_str(a_) + ")";
diff --git a/include/matx/operators/sph2cart.h b/include/matx/operators/sph2cart.h
index 2b320df06..3222d8c80 100644
--- a/include/matx/operators/sph2cart.h
+++ b/include/matx/operators/sph2cart.h
@@ -59,6 +59,7 @@ namespace matx
 
         __MATX_INLINE__ Sph2CartOp(const T1 &theta, const T2 &phi, const T3 &r) : theta_(theta), phi_(phi), r_(r)
       {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         MATX_ASSERT_COMPATIBLE_OP_SIZES(theta);
         MATX_ASSERT_COMPATIBLE_OP_SIZES(phi);
         MATX_ASSERT_COMPATIBLE_OP_SIZES(r);
diff --git a/include/matx/operators/stack.h b/include/matx/operators/stack.h
index 05d23a35e..90b392fe4 100644
--- a/include/matx/operators/stack.h
+++ b/include/matx/operators/stack.h
@@ -75,6 +75,7 @@ namespace matx
 
       __MATX_INLINE__ StackOp(int axis, const Ts&... ts) : ops_(ts...), axis_(axis)
       {
+        MATX_LOG_TRACE("{} constructor: axis={}, num_tensors={}", str(), axis, sizeof...(Ts));
         static_assert(sizeof...(Ts) > 1, "Must have more than one tensor to stack");
         static_assert((... && (RANK == Ts::Rank())), "stacked ops must have the same rank");
 
diff --git a/include/matx/operators/stdd.h b/include/matx/operators/stdd.h
index 5cf7887a6..6d93aef64 100644
--- a/include/matx/operators/stdd.h
+++ b/include/matx/operators/stdd.h
@@ -61,6 +61,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "stdd(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ StddOp(const OpA &a, int ddof) : a_(a),  ddof_(ddof) { 
+        MATX_LOG_TRACE("{} constructor: rank={}, ddof={}", str(), Rank(), ddof);
         for (int r = 0; r < ORank; r++) {
           out_dims_[r] = a_.Size(r);
         }
diff --git a/include/matx/operators/sum.h b/include/matx/operators/sum.h
index 79673e07f..6b0f7448b 100644
--- a/include/matx/operators/sum.h
+++ b/include/matx/operators/sum.h
@@ -63,6 +63,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "sum(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ SumOp(const OpA &a) : a_(a) { 
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         for (int r = 0; r < ORank; r++) {
           out_dims_[r] = a_.Size(r);
         }
diff --git a/include/matx/operators/svd.h b/include/matx/operators/svd.h
index 6d23201ca..3edc0b466 100644
--- a/include/matx/operators/svd.h
+++ b/include/matx/operators/svd.h
@@ -58,7 +58,9 @@ namespace detail {
       using svd_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "svd(" + get_type_str(a_) + ")"; }
-      __MATX_INLINE__ SVDOp(const OpA &a, const SVDMode jobz, const SVDHostAlgo algo) : a_(a), jobz_(jobz), algo_(algo) { };
+      __MATX_INLINE__ SVDOp(const OpA &a, const SVDMode jobz, const SVDHostAlgo algo) : a_(a), jobz_(jobz), algo_(algo) {
+        MATX_LOG_TRACE("{} constructor: jobz={}, algo={}", str(), static_cast<int>(jobz), static_cast<int>(algo));
+      };
 
       // This should never be called
       template <typename... Is>
@@ -167,8 +169,9 @@ namespace detail {
       using svd_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "svdpi(" + get_type_str(a_) + ")"; }
-      __MATX_INLINE__ SVDPIOp(const OpA &a, const OpX &x, int iterations, index_t k) : a_(a), x_(x), iterations_(iterations), k_(k) 
-      { }
+      __MATX_INLINE__ SVDPIOp(const OpA &a, const OpX &x, int iterations, index_t k) : a_(a), x_(x), iterations_(iterations), k_(k)
+      {
+        MATX_LOG_TRACE("{} constructor: iterations={}, k={}", str(), iterations, k); }
 
       // This should never be called
       template <typename... Is>
diff --git a/include/matx/operators/toeplitz.h b/include/matx/operators/toeplitz.h
index 319ca7892..daf49e15e 100644
--- a/include/matx/operators/toeplitz.h
+++ b/include/matx/operators/toeplitz.h
@@ -76,6 +76,7 @@ namespace matx
 
         __MATX_INLINE__ ToeplitzOp(const T1 &op1, const T2 &op2) : op1_(op1), op2_(op2)
       {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         if constexpr (is_matx_op<T1>()) {
           static_assert(T1::Rank() == 1, "toeplitz() operator input rank must be 1");
         }
diff --git a/include/matx/operators/trace.h b/include/matx/operators/trace.h
index 89571ada1..417038ff2 100644
--- a/include/matx/operators/trace.h
+++ b/include/matx/operators/trace.h
@@ -58,7 +58,9 @@ namespace detail {
       using trace_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "trace()"; }
-      __MATX_INLINE__ TraceOp(const OpA &a) : a_(a) {}
+      __MATX_INLINE__ TraceOp(const OpA &a) : a_(a) {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+      }
 
       __MATX_HOST__ __MATX_INLINE__ auto Data() const noexcept { return ptr; }
 
diff --git a/include/matx/operators/transpose.h b/include/matx/operators/transpose.h
index 1f09ccbb8..d73b100f1 100644
--- a/include/matx/operators/transpose.h
+++ b/include/matx/operators/transpose.h
@@ -68,7 +68,8 @@ namespace detail {
           else {
             out_dims_[r] = a_.Size(r);
           }
-        }        
+        }
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
       }
 
       template <typename CapType, typename... Is>
diff --git a/include/matx/operators/unary_operators.h b/include/matx/operators/unary_operators.h
index 29a43b04e..100a81968 100644
--- a/include/matx/operators/unary_operators.h
+++ b/include/matx/operators/unary_operators.h
@@ -73,6 +73,7 @@ namespace matx
     }
 
     __MATX_INLINE__ matxUnaryOp(const I1 &in1, const Op &op) : in1_(in1), op_(op) {
+      MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
       if constexpr (Rank() > 0) {
         for (int32_t i = 0; i < Rank(); i++) {
           size_[i] = get_size(in1_, i);
diff --git a/include/matx/operators/unique.h b/include/matx/operators/unique.h
index 6c88f0fc7..9296190df 100644
--- a/include/matx/operators/unique.h
+++ b/include/matx/operators/unique.h
@@ -55,7 +55,9 @@ namespace detail {
       using unique_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "unique()"; }
-      __MATX_INLINE__ UniqueOp(const OpA &a) : a_(a) { };
+      __MATX_INLINE__ UniqueOp(const OpA &a) : a_(a) {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+      };
 
       // This should never be called
       template <typename... Is>
diff --git a/include/matx/operators/updownsample.h b/include/matx/operators/updownsample.h
index 3d078c198..71a2428d7 100644
--- a/include/matx/operators/updownsample.h
+++ b/include/matx/operators/updownsample.h
@@ -66,6 +66,7 @@ namespace matx
         __MATX_INLINE__ std::string str() const { return "upsample(" + op_.str() + ")"; }
 
         __MATX_INLINE__ UpsampleOp(const T &op, int32_t dim, index_t n) : op_(op), dim_(dim), n_(n) {
+          MATX_LOG_TRACE("{} constructor: dim={}, n={}, rank={}", str(), dim, n, Rank());
         };
 
         template <typename CapType, typename... Is>
diff --git a/include/matx/operators/var.h b/include/matx/operators/var.h
index fee055f19..d4810b66b 100644
--- a/include/matx/operators/var.h
+++ b/include/matx/operators/var.h
@@ -61,6 +61,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "var(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ VarOp(const OpA &a, int ddof) : a_(a), ddof_(ddof) { 
+        MATX_LOG_TRACE("{} constructor: rank={}, ddof={}", str(), Rank(), ddof);
         for (int r = 0; r < ORank; r++) {
           out_dims_[r] = a_.Size(r);
         }        
diff --git a/include/matx/operators/zipvec.h b/include/matx/operators/zipvec.h
index 53cea3e4c..9f31ac6c4 100644
--- a/include/matx/operators/zipvec.h
+++ b/include/matx/operators/zipvec.h
@@ -74,6 +74,7 @@ namespace matx
 
       __MATX_INLINE__ ZipVecOp(const Ts&... ts) : ops_(ts...)
       {
+        MATX_LOG_TRACE("{} constructor: num_ops={}, rank={}", str(), sizeof...(Ts), Rank());
         static_assert(sizeof...(Ts) > 0 && sizeof...(Ts) <= 4, "Must have between 1 and 4 operators for zipvec");
         static_assert((... && (RANK == Ts::Rank())), "zipped ops must have the same rank");
         // All ops must have the same scalar value type; that is enforced by AggregateToVecType
diff --git a/include/matx/transforms/chol/chol_cuda.h b/include/matx/transforms/chol/chol_cuda.h
index c610c7173..3bf28ae08 100644
--- a/include/matx/transforms/chol/chol_cuda.h
+++ b/include/matx/transforms/chol/chol_cuda.h
@@ -303,8 +303,10 @@ void chol_impl(OutputTensor &&out, const ATensor &a,
   auto params = detail::matxDnCholCUDAPlan_t<OutputTensor, decltype(tmp_out)>::GetCholParams(tmp_out, uplo_cusolver, exec);
 
   using cache_val_type = detail::matxDnCholCUDAPlan_t<OutputTensor, decltype(tmp_out)>;
+  auto cache_id = detail::GetCacheIdFromType<detail::chol_cuda_cache_t>();
+  MATX_LOG_DEBUG("Cholesky transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::chol_cuda_cache_t>(
-    detail::GetCacheIdFromType<detail::chol_cuda_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(tmp_out, exec, uplo_cusolver);
diff --git a/include/matx/transforms/convert/dense2sparse_cusparse.h b/include/matx/transforms/convert/dense2sparse_cusparse.h
index 230536df0..fc658d756 100644
--- a/include/matx/transforms/convert/dense2sparse_cusparse.h
+++ b/include/matx/transforms/convert/dense2sparse_cusparse.h
@@ -310,8 +310,10 @@ void dense2sparse_impl(OutputTensorType &o, const InputTensorType &A,
 
   // Lookup and cache.
   using cache_val_type = detail::Dense2SparseHandle_t<otype, atype>;
+  auto cache_id = detail::GetCacheIdFromType<detail::dense2sparse_cache_t>();
+  MATX_LOG_DEBUG("Dense2Sparse transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::dense2sparse_cache_t>(
-      detail::GetCacheIdFromType<detail::dense2sparse_cache_t>(), params,
+      cache_id, params,
       [&]() { return std::make_shared<cache_val_type>(o, a, stream); },
       [&](std::shared_ptr<cache_val_type> cache_type) {
         cache_type->Exec(o, a);
diff --git a/include/matx/transforms/convert/sparse2dense_cusparse.h b/include/matx/transforms/convert/sparse2dense_cusparse.h
index 183935cab..30a13679f 100644
--- a/include/matx/transforms/convert/sparse2dense_cusparse.h
+++ b/include/matx/transforms/convert/sparse2dense_cusparse.h
@@ -257,8 +257,10 @@ void sparse2dense_impl(OutputTensorType &O, const InputTensorType &a,
 
   // Lookup and cache.
   using cache_val_type = detail::Sparse2DenseHandle_t<otype, atype>;
+  auto cache_id = detail::GetCacheIdFromType<detail::sparse2dense_cache_t>();
+  MATX_LOG_DEBUG("Sparse2Dense transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::sparse2dense_cache_t>(
-      detail::GetCacheIdFromType<detail::sparse2dense_cache_t>(), params,
+      cache_id, params,
       [&]() { return std::make_shared<cache_val_type>(o, a, stream); },
       [&](std::shared_ptr<cache_val_type> cache_type) {
         cache_type->Exec(o, a);
diff --git a/include/matx/transforms/convert/sparse2sparse_cusparse.h b/include/matx/transforms/convert/sparse2sparse_cusparse.h
index 1fffeebcd..8266f916d 100644
--- a/include/matx/transforms/convert/sparse2sparse_cusparse.h
+++ b/include/matx/transforms/convert/sparse2sparse_cusparse.h
@@ -226,8 +226,10 @@ void sparse2sparse_impl(OutputTensorType &o, const InputTensorType &a,
 
   // Lookup and cache.
   using cache_val_type = detail::Sparse2SparseHandle_t<otype, atype>;
+  auto cache_id = detail::GetCacheIdFromType<detail::sparse2sparse_cache_t>();
+  MATX_LOG_DEBUG("Sparse2Sparse transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::sparse2sparse_cache_t>(
-      detail::GetCacheIdFromType<detail::sparse2sparse_cache_t>(), params,
+      cache_id, params,
       [&]() { return std::make_shared<cache_val_type>(o, a, stream); },
       [&](std::shared_ptr<cache_val_type> cache_type) {
         cache_type->Exec(o, a);
diff --git a/include/matx/transforms/cov.h b/include/matx/transforms/cov.h
index 1afcd51d4..10161942b 100644
--- a/include/matx/transforms/cov.h
+++ b/include/matx/transforms/cov.h
@@ -240,8 +240,10 @@ void cov_impl(TensorTypeC &c, const TensorTypeA &a,
   auto params = detail::matxCovHandle_t<TensorTypeC, TensorTypeA>::GetCovParams(c, a, stream);
 
   using cache_val_type = detail::matxCovHandle_t<TensorTypeC, TensorTypeA>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cov_cache_t>();
+  MATX_LOG_DEBUG("Covariance transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cov_cache_t>(
-    detail::GetCacheIdFromType<detail::cov_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(c, a);
diff --git a/include/matx/transforms/cub.h b/include/matx/transforms/cub.h
index 4ee596a8c..447ec9dff 100644
--- a/include/matx/transforms/cub.h
+++ b/include/matx/transforms/cub.h
@@ -1502,8 +1502,10 @@ void sort_impl_inner(OutputTensor &a_out, const InputOperator &a,
                             detail::CUB_OP_RADIX_SORT>::GetCubParams(a_out, a, stream);
 
   using cache_val_type = detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_RADIX_SORT, detail::SortParams_t>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+  MATX_LOG_DEBUG("CUB radix sort transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-      detail::GetCacheIdFromType<detail::cub_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(a_out, a, p, stream);
@@ -1701,8 +1703,10 @@ void cub_reduce(OutputTensor &a_out, const InputOperator &a, typename InputOpera
                             detail::CUB_OP_REDUCE,
                             param_type>::GetCubParams(a_out, a, stream);
   using cache_val_type = detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_REDUCE, param_type>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+  MATX_LOG_DEBUG("CUB reduce transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-    detail::GetCacheIdFromType<detail::cub_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(a_out, a, reduce_params, stream);
@@ -1751,8 +1755,10 @@ void cub_sum(OutputTensor &a_out, const InputOperator &a,
                             detail::CUB_OP_REDUCE_SUM>::GetCubParams(a_out, a, stream);
 
   using cache_val_type = detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_REDUCE_SUM, detail::EmptyParams_t>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+  MATX_LOG_DEBUG("CUB reduce sum transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-      detail::GetCacheIdFromType<detail::cub_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(a_out, a, detail::EmptyParams_t{}, stream);
@@ -1796,8 +1802,10 @@ void cub_min(OutputTensor &a_out, const InputOperator &a,
                             detail::CUB_OP_REDUCE_MIN>::GetCubParams(a_out, a, stream);
 
   using cache_val_type = detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_REDUCE_MIN, detail::EmptyParams_t>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+  MATX_LOG_DEBUG("CUB reduce min transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-      detail::GetCacheIdFromType<detail::cub_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(a_out, a, detail::EmptyParams_t{}, stream);
@@ -1842,8 +1850,10 @@ void cub_max(OutputTensor &a_out, const InputOperator &a,
                             detail::CUB_OP_REDUCE_MAX>::GetCubParams(a_out, a, stream);
 
   using cache_val_type = detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_REDUCE_MAX, detail::EmptyParams_t>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+  MATX_LOG_DEBUG("CUB reduce max transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-      detail::GetCacheIdFromType<detail::cub_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(a_out, a, detail::EmptyParams_t{}, stream);
@@ -1907,8 +1917,10 @@ void cub_argreduce(OutputTensor &a_out, TensorIndexType &aidx_out, const InputOp
   #ifndef MATX_DISABLE_CUB_CACHE
     auto params = cache_val_type::GetCubParams(a_out_supported, aidx_out_supported, a_supported, detail::CUB_OP_SINGLE_ARG_REDUCE, stream);
 
+    auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+    MATX_LOG_DEBUG("CUB single arg reduce transform: cache_id={}", cache_id);
     detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-        detail::GetCacheIdFromType<detail::cub_cache_t>(),
+        cache_id,
         params,
         [&]() {
           return std::make_shared<cache_val_type>(a_out_supported, aidx_out_supported, a_supported, reduce_params, stream);
@@ -1976,8 +1988,10 @@ void cub_dualargreduce(OutputTensor &a1_out,
   #ifndef MATX_DISABLE_CUB_CACHE
     auto params = cache_val_type::GetCubParams(a1_out, aidx1_out, a2_out, aidx2_out, a, detail::CUB_OP_DUAL_ARG_REDUCE, stream);
 
+    auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+    MATX_LOG_DEBUG("CUB dual arg reduce transform: cache_id={}", cache_id);
     detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-        detail::GetCacheIdFromType<detail::cub_cache_t>(),
+        cache_id,
         params,
         [&]() {
           return std::make_shared<cache_val_type>(a1_out, aidx1_out, a2_out, aidx2_out, a, reduce_params, stream);
@@ -2256,8 +2270,10 @@ void cumsum_impl(OutputTensor &a_out, const InputOperator &a,
       detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_INC_SUM>::GetCubParams(a_out, a, stream);
 
   using cache_val_type = detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_INC_SUM, detail::EmptyParams_t>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+  MATX_LOG_DEBUG("CUB cumsum transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-      detail::GetCacheIdFromType<detail::cub_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(a_out, a, detail::EmptyParams_t{}, stream);
@@ -2353,8 +2369,10 @@ void hist_impl(OutputTensor &a_out, const InputOperator &a,
                             detail::CUB_OP_HIST_EVEN>::GetCubParams(a_out, a, stream);
 
   using cache_val_type = detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_HIST_EVEN, param_type>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+  MATX_LOG_DEBUG("CUB histogram transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-      detail::GetCacheIdFromType<detail::cub_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(a_out, a, hp, stream);
@@ -2494,8 +2512,10 @@ void find_impl(OutputTensor &a_out, CountTensor &num_found, const InputOperator
                             detail::CUB_OP_SELECT_VALS,
                             param_type>::GetCubParams(a_out, a, stream);
   using cache_val_type = detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_SELECT_VALS, param_type>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+  MATX_LOG_DEBUG("CUB find values transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-      detail::GetCacheIdFromType<detail::cub_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(a_out, a, cparams, stream);
@@ -2613,8 +2633,10 @@ void find_idx_impl(OutputTensor &a_out, CountTensor &num_found, const InputOpera
                             detail::CUB_OP_SELECT_IDX,
                             param_type>::GetCubParams(a_out, a, stream);
   using cache_val_type = detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_SELECT_IDX, param_type>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+  MATX_LOG_DEBUG("CUB find indices transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-      detail::GetCacheIdFromType<detail::cub_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(a_out, a, cparams, stream);
@@ -2732,8 +2754,10 @@ void unique_impl(OutputTensor &a_out, CountTensor &num_found, const InputOperato
                             detail::CUB_OP_UNIQUE,
                             param_type>::GetCubParams(a_out, a, stream);
   using cache_val_type = detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_UNIQUE, param_type>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+  MATX_LOG_DEBUG("CUB unique transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-      detail::GetCacheIdFromType<detail::cub_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(a_out, a, cparams, stream);
diff --git a/include/matx/transforms/eig/eig_cuda.h b/include/matx/transforms/eig/eig_cuda.h
index 572cec94d..aa19ee431 100644
--- a/include/matx/transforms/eig/eig_cuda.h
+++ b/include/matx/transforms/eig/eig_cuda.h
@@ -348,8 +348,10 @@ void eig_impl(OutputTensor &&out, WTensor &&w,
 
   // Get cache or new eigen plan if it doesn't exist
   using cache_val_type = detail::matxDnEigCUDAPlan_t<OutputTensor, decltype(w_new), decltype(a_new)>;
+  auto cache_id = detail::GetCacheIdFromType<detail::eig_cuda_cache_t>();
+  MATX_LOG_DEBUG("Eigenvalue transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::eig_cuda_cache_t>(
-    detail::GetCacheIdFromType<detail::eig_cuda_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(w_new, tv, exec, jobz_cusolver, uplo_cusolver);
diff --git a/include/matx/transforms/einsum.h b/include/matx/transforms/einsum.h
index dc3dc10a4..8ac230ea2 100644
--- a/include/matx/transforms/einsum.h
+++ b/include/matx/transforms/einsum.h
@@ -585,8 +585,10 @@ namespace cutensor {
 
     params.stream = stream;
 
+    auto cache_id = detail::GetCacheIdFromType<einsum_cache_t>();
+    MATX_LOG_DEBUG("Einsum transform: cache_id={}", cache_id);
     detail::GetCache().LookupAndExec<einsum_cache_t>(
-        detail::GetCacheIdFromType<einsum_cache_t>(),
+        cache_id,
         params,
         [&]() {
             return cuda::std::apply([&](auto&&... args) {
diff --git a/include/matx/transforms/fft/fft_cuda.h b/include/matx/transforms/fft/fft_cuda.h
index 218f184bc..4b31dcc18 100644
--- a/include/matx/transforms/fft/fft_cuda.h
+++ b/include/matx/transforms/fft/fft_cuda.h
@@ -706,8 +706,10 @@ __MATX_INLINE__ void fft_impl(OutputTensor o, const InputTensor i,
   params.stream = stream;
 
   using cache_val_type = detail::matxCUDAFFTPlan1D_t<decltype(out), decltype(in)>;
+  auto cache_id = detail::GetCacheIdFromType<detail::fft_cuda_cache_t>();
+  MATX_LOG_DEBUG("FFT1D forward transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::fft_cuda_cache_t>(
-    detail::GetCacheIdFromType<detail::fft_cuda_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(out, in);
@@ -752,8 +754,10 @@ __MATX_INLINE__ void ifft_impl(OutputTensor o, const InputTensor i,
   params.stream = stream;
 
   using cache_val_type = detail::matxCUDAFFTPlan1D_t<decltype(out), decltype(in)>;
+  auto cache_id = detail::GetCacheIdFromType<detail::fft_cuda_cache_t>();
+  MATX_LOG_DEBUG("FFT1D inverse transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::fft_cuda_cache_t>(
-    detail::GetCacheIdFromType<detail::fft_cuda_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(out, in);
@@ -793,8 +797,10 @@ __MATX_INLINE__ void fft2_impl(OutputTensor o, const InputTensor i, FFTNorm norm
   params.stream = stream;
 
   using cache_val_type = detail::matxCUDAFFTPlan2D_t<decltype(out), decltype(in)>;
+  auto cache_id = detail::GetCacheIdFromType<detail::fft_cuda_cache_t>();
+  MATX_LOG_DEBUG("FFT2D forward transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::fft_cuda_cache_t>(
-    detail::GetCacheIdFromType<detail::fft_cuda_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(out, in);
@@ -835,8 +841,10 @@ __MATX_INLINE__ void ifft2_impl(OutputTensor o, const InputTensor i, FFTNorm nor
 
   // Get cache or new FFT plan if it doesn't exist
   using cache_val_type = detail::matxCUDAFFTPlan2D_t<decltype(out), decltype(in)>;
+  auto cache_id = detail::GetCacheIdFromType<detail::fft_cuda_cache_t>();
+  MATX_LOG_DEBUG("FFT2D inverse transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::fft_cuda_cache_t>(
-    detail::GetCacheIdFromType<detail::fft_cuda_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(out, in);
diff --git a/include/matx/transforms/filter.h b/include/matx/transforms/filter.h
index 51ecbc511..1e5c53d06 100644
--- a/include/matx/transforms/filter.h
+++ b/include/matx/transforms/filter.h
@@ -477,8 +477,10 @@ void filter_impl([[maybe_unused]] OutType &o, [[maybe_unused]] const InType &i,
   params.hash = rhash + nrhash;
 
   using cache_val_type = detail::matxFilter_t<NR, NNR, OutType, InType, FilterType>;
+  auto cache_id = detail::GetCacheIdFromType<detail::filter_cache_t>();
+  MATX_LOG_DEBUG("Filter transform: cache_id={}, NR={}, NNR={}", cache_id, NR, NNR);
   detail::GetCache().LookupAndExec<detail::filter_cache_t>(
-    detail::GetCacheIdFromType<detail::filter_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return matxMakeFilter(o, i, h_rec, h_nonrec);
diff --git a/include/matx/transforms/inverse.h b/include/matx/transforms/inverse.h
index 368c2fe73..e2b9f68f2 100644
--- a/include/matx/transforms/inverse.h
+++ b/include/matx/transforms/inverse.h
@@ -593,8 +593,10 @@ void inv_impl(TensorTypeAInv &a_inv, const TensorTypeA &a,
   auto params = detail::matxInversePlan_t<TensorTypeAInv, TensorTypeA, ALGO>::GetInverseParams(a_inv, a, stream);
 
   using cache_val_type = detail::matxInversePlan_t<TensorTypeAInv, TensorTypeA, ALGO>;
+  auto cache_id = detail::GetCacheIdFromType<detail::inv_cache_t>();
+  MATX_LOG_DEBUG("Inverse transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::inv_cache_t>(
-    detail::GetCacheIdFromType<detail::inv_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(a_inv, a, stream);
diff --git a/include/matx/transforms/lu/lu_cuda.h b/include/matx/transforms/lu/lu_cuda.h
index ca3c5fa67..229e8874c 100644
--- a/include/matx/transforms/lu/lu_cuda.h
+++ b/include/matx/transforms/lu/lu_cuda.h
@@ -292,8 +292,10 @@ void lu_impl(OutputTensor &&out, PivotTensor &&piv,
 
   // Get cache or new LU plan if it doesn't exist
   using cache_val_type = detail::matxDnLUCUDAPlan_t<OutputTensor, decltype(piv_new), decltype(a_new)>;
+  auto cache_id = detail::GetCacheIdFromType<detail::lu_cuda_cache_t>();
+  MATX_LOG_DEBUG("LU transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::lu_cuda_cache_t>(
-    detail::GetCacheIdFromType<detail::lu_cuda_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(piv_new, tvt, exec);
diff --git a/include/matx/transforms/matmul/matmul_cuda.h b/include/matx/transforms/matmul/matmul_cuda.h
index 7024510ac..710600585 100644
--- a/include/matx/transforms/matmul/matmul_cuda.h
+++ b/include/matx/transforms/matmul/matmul_cuda.h
@@ -1230,8 +1230,10 @@ void matmul_impl(TensorTypeC C, const TensorTypeA A,
     params.stream = stream;
 
     using cache_val_type = detail::MatMulCUDAHandle_t<ctype, atype, btype, PROV>;
+    auto cache_id = detail::GetCacheIdFromType<detail::gemm_cuda_cache_t>();
+    MATX_LOG_DEBUG("MatMul transform: cache_id={}", cache_id);
     detail::GetCache().LookupAndExec<detail::gemm_cuda_cache_t>(
-      detail::GetCacheIdFromType<detail::gemm_cuda_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(c, a, b);
diff --git a/include/matx/transforms/matmul/matmul_cusparse.h b/include/matx/transforms/matmul/matmul_cusparse.h
index 3ac0664cd..83ca78240 100644
--- a/include/matx/transforms/matmul/matmul_cusparse.h
+++ b/include/matx/transforms/matmul/matmul_cusparse.h
@@ -320,8 +320,10 @@ void sparse_matmul_impl(TensorTypeC &C, const TensorTypeA &a,
 
   // Lookup and cache.
   using cache_val_type = detail::MatMulCUSPARSEHandle_t<ctype, atype, btype>;
+  auto cache_id = detail::GetCacheIdFromType<detail::gemm_cusparse_cache_t>();
+  MATX_LOG_DEBUG("MatMul CUSPARSE transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::gemm_cusparse_cache_t>(
-      detail::GetCacheIdFromType<detail::gemm_cusparse_cache_t>(), params,
+      cache_id, params,
       [&]() {
         return std::make_shared<cache_val_type>(c, a, b, stream, alpha, beta);
       },
diff --git a/include/matx/transforms/matmul/matvec_cusparse.h b/include/matx/transforms/matmul/matvec_cusparse.h
index d3f979793..322071063 100644
--- a/include/matx/transforms/matmul/matvec_cusparse.h
+++ b/include/matx/transforms/matmul/matvec_cusparse.h
@@ -343,8 +343,10 @@ void sparse_matvec_impl(TensorTypeC &C, const TensorTypeA &a,
 
     // Lookup and cache.
     using cache_val_type = detail::MatVecCUSPARSEHandle_t<ctype, atype, btype>;
+    auto cache_id = detail::GetCacheIdFromType<detail::spmv_cusparse_cache_t>();
+    MATX_LOG_DEBUG("MatVec CUSPARSE transform: cache_id={}", cache_id);
     detail::GetCache().LookupAndExec<detail::spmv_cusparse_cache_t>(
-        detail::GetCacheIdFromType<detail::spmv_cusparse_cache_t>(), params,
+        cache_id, params,
         [&]() {
           return std::make_shared<cache_val_type>(c, a, b, stream, alpha, beta);
         },
diff --git a/include/matx/transforms/qr/qr_cuda.h b/include/matx/transforms/qr/qr_cuda.h
index 5d677db49..73bf6686d 100644
--- a/include/matx/transforms/qr/qr_cuda.h
+++ b/include/matx/transforms/qr/qr_cuda.h
@@ -475,8 +475,10 @@ void qr_solver_impl(OutTensor &&out, TauTensor &&tau,
 
   // Get cache or new QR plan if it doesn't exist
   using cache_val_type = detail::matxDnQRCUDAPlan_t<OutTensor, decltype(tau_new), decltype(a_new)>;
+  auto cache_id = detail::GetCacheIdFromType<detail::qr_cuda_cache_t>();
+  MATX_LOG_DEBUG("QR transform (full): cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::qr_cuda_cache_t>(
-    detail::GetCacheIdFromType<detail::qr_cuda_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(tau_new, tvt, exec);
@@ -848,8 +850,10 @@ void qr_econ_impl(OutTensor &&out, RTensor &&out_r,
 
   // Get cache or new QR plan if it doesn't exist
   using cache_val_type = detail::matxDnEconQRCUDAPlan_t<OutTensor, RTensor, decltype(tau_new), decltype(a_new)>;
+  auto cache_id = detail::GetCacheIdFromType<detail::qr_cuda_cache_t>();
+  MATX_LOG_DEBUG("QR transform (economic): cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::qr_cuda_cache_t>(
-    detail::GetCacheIdFromType<detail::qr_cuda_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(tau_new, tvt, exec);
diff --git a/include/matx/transforms/solve/solve_cudss.h b/include/matx/transforms/solve/solve_cudss.h
index 2aa324377..568003e34 100644
--- a/include/matx/transforms/solve/solve_cudss.h
+++ b/include/matx/transforms/solve/solve_cudss.h
@@ -286,8 +286,10 @@ void sparse_solve_impl(TensorTypeC &C, const TensorTypeA &a,
 
   // Lookup and cache.
   using cache_val_type = detail::SolveCUDSSHandle_t<ctype, atype, btype>;
+  auto cache_id = detail::GetCacheIdFromType<detail::gemm_cudss_cache_t>();
+  MATX_LOG_DEBUG("Solve CUDSS transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::gemm_cudss_cache_t>(
-      detail::GetCacheIdFromType<detail::gemm_cudss_cache_t>(), params,
+      cache_id, params,
       [&]() { return std::make_shared<cache_val_type>(c, a, b, stream); },
       [&](std::shared_ptr<cache_val_type> cache_type) {
         cache_type->Exec(c, a, b);
diff --git a/include/matx/transforms/svd/svd_cuda.h b/include/matx/transforms/svd/svd_cuda.h
index ac664a37e..4b602c396 100644
--- a/include/matx/transforms/svd/svd_cuda.h
+++ b/include/matx/transforms/svd/svd_cuda.h
@@ -1001,8 +1001,10 @@ void svd_impl(UTensor &&u, STensor &&s,
 
     // Get cache or new SVD plan if it doesn't exist
     using cache_val_type = detail::matxDnSVDCUDAPlan_t<decltype(u_in), decltype(s_new), decltype(vt_in), decltype(at_col_maj)>;
+    auto cache_id = detail::GetCacheIdFromType<detail::svd_cuda_cache_t>();
+    MATX_LOG_DEBUG("SVD transform (full): cache_id={}", cache_id);
     detail::GetCache().LookupAndExec<detail::svd_cuda_cache_t>(
-      detail::GetCacheIdFromType<detail::svd_cuda_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(u_in, s_new, vt_in, at_col_maj, method, exec, job_cusolver);
@@ -1039,8 +1041,10 @@ void svd_impl(UTensor &&u, STensor &&s,
 
     // Get cache or new SVD plan if it doesn't exist
     using cache_val_type = detail::matxDnSVDCUDAPlan_t<decltype(u_col_maj), decltype(s_new), decltype(vt_col_maj), decltype(tvt)>;
+    auto cache_id = detail::GetCacheIdFromType<detail::svd_cuda_cache_t>();
+    MATX_LOG_DEBUG("SVD transform (vectors): cache_id={}", cache_id);
     detail::GetCache().LookupAndExec<detail::svd_cuda_cache_t>(
-      detail::GetCacheIdFromType<detail::svd_cuda_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(u_col_maj, s_new, vt_col_maj, tvt, method, exec, job_cusolver);

From 419daae52c5a1175c67d22f77c72aded6d018b7f Mon Sep 17 00:00:00 2001
From: cliffburdick <cburdick@nvidia.com>
Date: Thu, 30 Oct 2025 17:40:02 -0700
Subject: [PATCH 2/2] fix format

---
 include/matx/core/make_tensor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/matx/core/make_tensor.h b/include/matx/core/make_tensor.h
index 815eab965..a246547a1 100644
--- a/include/matx/core/make_tensor.h
+++ b/include/matx/core/make_tensor.h
@@ -80,7 +80,7 @@ template <typename T, typename ShapeType,
 auto make_tensor(Storage<T> storage, ShapeType &&shape) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
   
-  MATX_LOG_DEBUG("make_tensor<T,ShapeType>(storage, shape): ptr={}", storage.data());
+  MATX_LOG_DEBUG("make_tensor<T,ShapeType>(storage, shape): ptr={}", reinterpret_cast<const void*>(storage.data()));
 
   constexpr int RANK = static_cast<int>(cuda::std::tuple_size<typename remove_cvref<ShapeType>::type>::value);
   DefaultDescriptor<RANK> desc{std::forward<ShapeType>(shape)};