diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9948d173..59e0ceb1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -81,6 +81,7 @@ option(MATX_EN_CUDA_LINEINFO "Enable line information for CUDA kernels via -line
 option(MATX_EN_EXTENDED_LAMBDA "Enable extended lambda support for device/host lambdas" ON)
 option(MATX_EN_MATHDX "Enable MathDx support for kernel fusion" OFF)
 option(MATX_EN_UNSAFE_ALIAS_DETECTION "Enable aliased memory detection" OFF)
+option(MATX_DISABLE_EXCEPTIONS "Disable C++ exceptions and log errors instead" OFF)
 
 set(MATX_EN_PYBIND11 OFF CACHE BOOL "Enable pybind11 support")
 
@@ -206,6 +207,9 @@ if (MATX_NVTX_FLAGS)
     add_definitions(-DMATX_NVTX_FLAGS)
     target_compile_definitions(matx INTERFACE MATX_NVTX_FLAGS)
 endif()
+if (MATX_DISABLE_EXCEPTIONS)
+    target_compile_definitions(matx INTERFACE MATX_DISABLE_EXCEPTIONS)
+endif()
 if (MATX_BUILD_32_BIT)
     set(MATX_NVPL_INT_TYPE "lp64")
     target_compile_definitions(matx INTERFACE MATX_INDEX_32_BIT)
diff --git a/docs_input/build.rst b/docs_input/build.rst
index 519cec8f..60790249 100644
--- a/docs_input/build.rst
+++ b/docs_input/build.rst
@@ -201,6 +201,8 @@ By default, all of these options are OFF.
     - ``-DMATX_EN_MATHDX=ON``
   * - Enable pybind11 Support. This option is usually not explicitly set, but is enabled by other options.
     - ``-DMATX_EN_PYBIND11=ON``
+  * - Disable Exceptions
+    - ``-DMATX_DISABLE_EXCEPTIONS=ON``
 
 
 NVTX Flags
diff --git a/examples/black_scholes.cu b/examples/black_scholes.cu
index 77e872cd..38996b82 100644
--- a/examples/black_scholes.cu
+++ b/examples/black_scholes.cu
@@ -150,16 +150,15 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
   index_t input_size = 100'000'000;
   constexpr uint32_t num_iterations = 100;
   float time_ms;
-
-  tensor_t<dtype, 1> K_tensor{{input_size}};
-  tensor_t<dtype, 1> S_tensor{{input_size}};
-  tensor_t<dtype, 1> V_tensor{{input_size}};
-  tensor_t<dtype, 1> r_tensor{{input_size}};
-  tensor_t<dtype, 1> T_tensor{{input_size}};
-  tensor_t<dtype, 1> output_tensor{{input_size}};
-  tensor_t<dtype, 1> output_tensor2{{input_size}};
-  tensor_t<dtype, 1> output_tensor3{{input_size}};
-  tensor_t<dtype, 1> output_tensor4{{input_size}};
+  auto K_tensor = make_tensor<dtype>({input_size});
+  auto S_tensor = make_tensor<dtype>({input_size});
+  auto V_tensor = make_tensor<dtype>({input_size});
+  auto r_tensor = make_tensor<dtype>({input_size});
+  auto T_tensor = make_tensor<dtype>({input_size});
+  auto output_tensor = make_tensor<dtype>({input_size});
+  auto output_tensor2 = make_tensor<dtype>({input_size});
+  auto output_tensor3 = make_tensor<dtype>({input_size});
+  auto output_tensor4 = make_tensor<dtype>({input_size});
 
   (K_tensor = random<float>({input_size}, UNIFORM)).run();
   (S_tensor = random<float>({input_size}, UNIFORM)).run();
@@ -171,8 +170,6 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
   cudaStreamCreate(&stream);
   cudaExecutor exec{stream};
 
-  //compute_black_scholes_matx(K_tensor, S_tensor, V_tensor, r_tensor, T_tensor, output_tensor, exec);
-
   cudaEvent_t start, stop;
   cudaEventCreate(&start);
   cudaEventCreate(&stop);
diff --git a/include/matx/core/allocator.h b/include/matx/core/allocator.h
index d374bdf8..5e43a795 100644
--- a/include/matx/core/allocator.h
+++ b/include/matx/core/allocator.h
@@ -43,6 +43,7 @@
 
 #include "matx/core/error.h"
 #include "matx/core/nvtx.h"
+#include "matx/core/log.h"
 #include <cuda/std/functional>
 #include <cuda/std/__algorithm/max.h>
 
@@ -122,6 +123,9 @@ struct MemTracker {
 
     size_t bytes = iter->second.size;
 
+    MATX_LOG_DEBUG("Deallocating memory: ptr={}, {} bytes, space={}, remaining={} bytes", 
+                   ptr, bytes, static_cast<int>(iter->second.kind), matxMemoryStats.currentBytesAllocated - bytes);
+
     matxMemoryStats.currentBytesAllocated -= bytes;
 
     switch (iter->second.kind) {
@@ -187,6 +191,8 @@ struct MemTracker {
       }
     }
     
+    MATX_LOG_DEBUG("Allocating memory: {} bytes, space={}, stream={}", bytes, static_cast<int>(space), reinterpret_cast<void*>(stream));
+    
     switch (space) {
     case MATX_MANAGED_MEMORY:
       err = cudaMallocManaged(ptr, bytes);
@@ -214,6 +220,8 @@ struct MemTracker {
       MATX_THROW(matxOutOfMemory, "Failed to allocate memory");
     }
 
+    MATX_LOG_DEBUG("Allocated memory: ptr={}, {} bytes, total_current={} bytes", *ptr, bytes, matxMemoryStats.currentBytesAllocated + bytes);
+
     [[maybe_unused]] std::unique_lock lck(memory_mtx);
     matxMemoryStats.currentBytesAllocated += bytes;
     matxMemoryStats.totalBytesAllocated += bytes;
diff --git a/include/matx/core/cache.h b/include/matx/core/cache.h
index 1ac94b34..0279316d 100644
--- a/include/matx/core/cache.h
+++ b/include/matx/core/cache.h
@@ -192,11 +192,15 @@ class matxCache_t {
     auto &common_params_cache = rmap[key];
     auto cache_el = common_params_cache.find(params);
     if (cache_el == common_params_cache.end()) {
+      MATX_LOG_DEBUG("Cache MISS for transform: id={}, device={}, thread={}", 
+                     id, key.device_id, reinterpret_cast<void*>(std::hash<std::thread::id>{}(key.thread_id)));
       std::any tmp = mfun();
       common_params_cache.insert({params, tmp});
       efun(std::any_cast<decltype(mfun())>(tmp));
     }
     else {
+      MATX_LOG_DEBUG("Cache HIT for transform: id={}, device={}, thread={}", 
+                     id, key.device_id, reinterpret_cast<void*>(std::hash<std::thread::id>{}(key.thread_id)));
       efun(std::any_cast<decltype(mfun())>(cache_el->second));
     }
   }
diff --git a/include/matx/core/error.h b/include/matx/core/error.h
index e35ea35f..dd924932 100644
--- a/include/matx/core/error.h
+++ b/include/matx/core/error.h
@@ -42,6 +42,7 @@
 #endif
 
 #include "matx/core/stacktrace.h"
+#include "matx/core/log.h"
 #endif
 
 namespace matx
@@ -150,6 +151,23 @@ namespace matx
   };
   }
 
+#ifdef MATX_DISABLE_EXCEPTIONS
+
+#define MATX_ENTER_HANDLER() {
+#define MATX_EXIT_HANDLER() }
+
+#define MATX_THROW(e, str_arg)                       \
+  do {                                               \
+    MATX_LOG_FATAL("matxException ({}: {}) - {}:{}", matxErrorString(e), str_arg, __FILE__, __LINE__); \
+    std::stringstream matx_stack_trace;              \
+    detail::printStackTrace(matx_stack_trace);       \
+    std::string matx_stack_str = matx_stack_trace.str(); \
+    MATX_LOG_FATAL("Stack Trace:\n{}", matx_stack_str); \
+    std::abort();                                    \
+  } while(0)
+
+#else
+
 #define MATX_ENTER_HANDLER() \
   try                        \
   {
@@ -158,8 +176,8 @@ namespace matx
   }                                                             \
   catch (matx::detail::matxException & e)                       \
   {                                                             \
-    fprintf(stderr, "%s\n", e.what());                          \
-    fprintf(stderr, "Stack Trace:\n%s", e.stack.str().c_str()); \
+    MATX_LOG_FATAL("{}", e.what());                             \
+    MATX_LOG_FATAL("Stack Trace:\n{}", e.stack.str());          \
     exit(1);                                                    \
   }
 
@@ -168,6 +186,8 @@ namespace matx
     throw matx::detail::matxException(e, str, __FILE__, __LINE__); \
   }
 
+#endif
+
 #if !defined(NDEBUG) && !defined(__CUDA_ARCH__)
   #define MATX_ASSERT(a, error) \
   {                           \
@@ -190,7 +210,7 @@ namespace matx
     auto tmp = a;                      \
     if ((tmp != expected))             \
     {                                  \
-      std::cout << #a ": " << str << "(" << tmp << " != " << expected << ")\n";\
+      MATX_LOG_ERROR("{}: {} ({} != {})", #a, str, static_cast<int>(tmp), static_cast<int>(expected)); \
       MATX_THROW(error, "");           \
     }                                  \
   }
@@ -217,7 +237,7 @@ namespace matx
     const auto e_ = (e);                                        \
     if (e_ != cudaSuccess)                                      \
     {                                                           \
-      fprintf(stderr, "%s:%d CUDA Error: %s (%d)\n", __FILE__,__LINE__, cudaGetErrorString(e_), e_); \
+      MATX_LOG_ERROR("{}:{} CUDA Error: {} ({})", __FILE__, __LINE__, cudaGetErrorString(e_), static_cast<int>(e_)); \
       MATX_THROW(matx::matxCudaError, cudaGetErrorString(e_));  \
     }                                                           \
   } while (0)
@@ -239,21 +259,22 @@ namespace matx
       compatible = (size == 0 || size == Size(i));                   \
     }                                                                \
     if (!compatible) { \
-      std::cerr << "Incompatible operator sizes: ("; \
+      std::string msg = "Incompatible operator sizes: ("; \
       for (int32_t i = 0; i < Rank(); i++) { \
-        std::cerr << Size(i); \
+        msg += std::to_string(Size(i)); \
         if (i != Rank() - 1) { \
-          std::cerr << ","; \
+          msg += ","; \
         } \
       } \
-      std::cerr << ") not compatible with ("; \
+      msg += ") not compatible with ("; \
       for (int32_t i = 0; i < Rank(); i++) { \
-        std::cerr << matx::detail::get_expanded_size<Rank()>(op, i); \
+        msg += std::to_string(matx::detail::get_expanded_size<Rank()>(op, i)); \
         if (i != Rank() - 1) { \
-          std::cerr << ","; \
+          msg += ","; \
         } \
       } \
-      std::cerr << ")" << std::endl; \
+      msg += ")"; \
+      MATX_LOG_ERROR("{}", msg); \
       MATX_THROW(matxInvalidSize, "Incompatible operator sizes"); \
     } \
   }
diff --git a/include/matx/core/log.h b/include/matx/core/log.h
index 094cffc3..5bd93841 100644
--- a/include/matx/core/log.h
+++ b/include/matx/core/log.h
@@ -46,6 +46,83 @@
 #include <memory>
 #include <mutex>
 
+// Include MatX type traits and complex types for formatting support
+#include "matx/core/half.h"
+#include "matx/core/half_complex.h"
+#include <complex>
+#include <cuda/std/complex>
+
+// Helper for formatting complex types
+namespace matx {
+namespace detail {
+  // Generic helper to format any complex-like type with real() and imag() methods
+  template<typename ComplexType>
+  inline std::string format_complex(const ComplexType& c) {
+    return std::format("({:g}{:+g}j)", 
+                      static_cast<double>(c.real()), 
+                      static_cast<double>(c.imag()));
+  }
+}
+}
+
+// Formatter specializations for all types supported by MatX
+namespace std {
+  // Formatter for std::complex<T>
+  template<typename T>
+  struct formatter<std::complex<T>> {
+    constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+    
+    template<typename FormatContext>
+    auto format(const std::complex<T>& c, FormatContext& ctx) const {
+      return format_to(ctx.out(), "{}", matx::detail::format_complex(c));
+    }
+  };
+  
+  // Formatter for cuda::std::complex<T>
+  template<typename T>
+  struct formatter<cuda::std::complex<T>> {
+    constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+    
+    template<typename FormatContext>
+    auto format(const cuda::std::complex<T>& c, FormatContext& ctx) const {
+      return format_to(ctx.out(), "{}", matx::detail::format_complex(c));
+    }
+  };
+  
+  // Formatter for matxHalfComplex (fp16/bf16 complex)
+  template<typename T>
+  struct formatter<matx::matxHalfComplex<T>> {
+    constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+    
+    template<typename FormatContext>
+    auto format(const matx::matxHalfComplex<T>& c, FormatContext& ctx) const {
+      return format_to(ctx.out(), "{}", matx::detail::format_complex(c));
+    }
+  };
+  
+  // Formatter for matxFp16 (half-precision float)
+  template<>
+  struct formatter<matx::matxFp16> {
+    constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+    
+    template<typename FormatContext>
+    auto format(const matx::matxFp16& val, FormatContext& ctx) const {
+      return format_to(ctx.out(), "{:g}", static_cast<float>(val));
+    }
+  };
+  
+  // Formatter for matxBf16 (bfloat16)
+  template<>
+  struct formatter<matx::matxBf16> {
+    constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+    
+    template<typename FormatContext>
+    auto format(const matx::matxBf16& val, FormatContext& ctx) const {
+      return format_to(ctx.out(), "{:g}", static_cast<float>(val));
+    }
+  };
+}
+
 namespace matx {
 namespace detail {
 
@@ -139,7 +216,7 @@ class Logger {
   std::mutex mutex_;
   bool show_function_;
   
-  Logger() : min_level_(LogLevel::OFF), output_stream_(&std::cout), show_function_(false) {
+  Logger() : min_level_(LogLevel::ERROR), output_stream_(&std::cout), show_function_(false) {
     // Read log level from environment
     const char* level_env = std::getenv("MATX_LOG_LEVEL");
     if (level_env) {
diff --git a/include/matx/core/make_tensor.h b/include/matx/core/make_tensor.h
index f226a2df..a246547a 100644
--- a/include/matx/core/make_tensor.h
+++ b/include/matx/core/make_tensor.h
@@ -37,6 +37,7 @@
 #include "matx/core/storage.h"
 #include "matx/core/tensor_desc.h"
 #include "matx/core/dlpack.h"
+#include "matx/core/log.h"
 namespace matx {
 
 /**
@@ -52,6 +53,15 @@ auto make_tensor( const index_t (&shape)[RANK],
                   matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                   cudaStream_t stream = 0) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  std::string shape_str = "[";
+  for (int i = 0; i < RANK; i++) {
+    if (i > 0) shape_str += ",";
+    shape_str += std::to_string(shape[i]);
+  }
+  shape_str += "]";
+  MATX_LOG_DEBUG("make_tensor<T,RANK>(shape, space, stream): shape={}, space={}, stream={}", 
+                 shape_str, static_cast<int>(space), reinterpret_cast<void*>(stream));
 
   DefaultDescriptor<RANK> desc{shape};
   auto storage = make_owning_storage<T>(desc.TotalSize(), space, stream);
@@ -69,6 +79,8 @@ template <typename T, typename ShapeType,
   std::enable_if_t<!is_matx_descriptor_v<ShapeType> && !std::is_array_v<typename remove_cvref<ShapeType>::type>, bool> = true>
 auto make_tensor(Storage<T> storage, ShapeType &&shape) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor<T,ShapeType>(storage, shape): ptr={}", reinterpret_cast<const void*>(storage.data()));
 
   constexpr int RANK = static_cast<int>(cuda::std::tuple_size<typename remove_cvref<ShapeType>::type>::value);
   DefaultDescriptor<RANK> desc{std::forward<ShapeType>(shape)};
@@ -89,6 +101,15 @@ void make_tensor( TensorType &tensor,
                   matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                   cudaStream_t stream = 0) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  std::string shape_str = "[";
+  for (int i = 0; i < TensorType::Rank(); i++) {
+    if (i > 0) shape_str += ",";
+    shape_str += std::to_string(shape[i]);
+  }
+  shape_str += "]";
+  MATX_LOG_DEBUG("make_tensor(tensor&, shape, space, stream): shape={}, space={}, stream={}", 
+                 shape_str, static_cast<int>(space), reinterpret_cast<void*>(stream));
 
   auto tmp = make_tensor<typename TensorType::value_type, TensorType::Rank()>(shape, space, stream);
   tensor.Shallow(tmp);
@@ -108,6 +129,15 @@ auto make_tensor_p( const index_t (&shape)[RANK],
                     matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                     cudaStream_t stream = 0) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  std::string shape_str = "[";
+  for (int i = 0; i < RANK; i++) {
+    if (i > 0) shape_str += ",";
+    shape_str += std::to_string(shape[i]);
+  }
+  shape_str += "]";
+  MATX_LOG_DEBUG("make_tensor_p<T,RANK>(shape, space, stream): shape={}, space={}, stream={}", 
+                 shape_str, static_cast<int>(space), reinterpret_cast<void*>(stream));
 
   DefaultDescriptor<RANK> desc{shape};
   auto storage = make_owning_storage<T>(desc.TotalSize(), space, stream);
@@ -134,6 +164,9 @@ auto make_tensor( ShapeType &&shape,
                   matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                   cudaStream_t stream = 0) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor<T,ShapeType>(shape, space, stream): space={}, stream={}", 
+                 static_cast<int>(space), reinterpret_cast<void*>(stream));
 
   constexpr int rank = static_cast<int>(cuda::std::tuple_size<typename remove_cvref<ShapeType>::type>::value);
   DefaultDescriptor<rank> desc{std::move(shape)};
@@ -165,6 +198,9 @@ auto make_tensor( TensorType &tensor,
                   matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                   cudaStream_t stream = 0) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor(tensor&, shape, space, stream): space={}, stream={}", 
+                 static_cast<int>(space), reinterpret_cast<void*>(stream));
 
   auto tmp = make_tensor<typename TensorType::value_type, ShapeType>(std::forward<ShapeType>(shape), space, stream);
   tensor.Shallow(tmp);
@@ -189,6 +225,9 @@ auto make_tensor_p( ShapeType &&shape,
                     matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                     cudaStream_t stream = 0) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor_p<T,ShapeType>(shape, space, stream): space={}, stream={}", 
+                 static_cast<int>(space), reinterpret_cast<void*>(stream));
 
   DefaultDescriptor<static_cast<int>(cuda::std::tuple_size<typename remove_cvref<ShapeType>::type>::value)> desc{std::move(shape)};
 
@@ -212,6 +251,8 @@ template <typename T>
 auto make_tensor( [[maybe_unused]] const std::initializer_list<detail::no_size_t> t,
                   matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                   cudaStream_t stream = 0) {
+  MATX_LOG_DEBUG("make_tensor<T>(0D, space, stream): space={}, stream={}", 
+                 static_cast<int>(space), reinterpret_cast<void*>(stream));
   using shape_t = cuda::std::array<index_t, 0>;
   return make_tensor<T, shape_t>(shape_t{}, space, stream);
 }
@@ -230,6 +271,8 @@ template <typename TensorType,
 auto make_tensor( TensorType &tensor,
                   matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                   cudaStream_t stream = 0) {
+  MATX_LOG_DEBUG("make_tensor(tensor&, 0D, space, stream): space={}, stream={}", 
+                 static_cast<int>(space), reinterpret_cast<void*>(stream));
   auto tmp = make_tensor<typename TensorType::value_type>({}, space, stream);
   tensor.Shallow(tmp);
 }
@@ -247,6 +290,8 @@ template <typename T>
 auto make_tensor_p( [[maybe_unused]] const std::initializer_list<detail::no_size_t> t,
                     matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                     cudaStream_t stream = 0) {
+  MATX_LOG_DEBUG("make_tensor_p<T>(0D, space, stream): space={}, stream={}", 
+                 static_cast<int>(space), reinterpret_cast<void*>(stream));
 
   cuda::std::array<index_t, 0> shape;
   return make_tensor_p<T, decltype(shape)>(std::move(shape), space, stream);
@@ -268,6 +313,15 @@ auto make_tensor( T *data,
                   const index_t (&shape)[RANK],
                   bool owning = false) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  std::string shape_str = "[";
+  for (int i = 0; i < RANK; i++) {
+    if (i > 0) shape_str += ",";
+    shape_str += std::to_string(shape[i]);
+  }
+  shape_str += "]";
+  MATX_LOG_DEBUG("make_tensor<T,RANK>(data, shape, owning): ptr={}, shape={}, owning={}", 
+                 reinterpret_cast<void*>(data), shape_str, owning);
 
   DefaultDescriptor<RANK> desc{shape};
   auto storage = owning ? make_owning_storage<T>(desc.TotalSize()) : make_non_owning_storage<T>(data, desc.TotalSize());
@@ -291,6 +345,15 @@ auto make_tensor( TensorType &tensor,
                   typename TensorType::value_type *data,
                   const index_t (&shape)[TensorType::Rank()]) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  std::string shape_str = "[";
+  for (int i = 0; i < TensorType::Rank(); i++) {
+    if (i > 0) shape_str += ",";
+    shape_str += std::to_string(shape[i]);
+  }
+  shape_str += "]";
+  MATX_LOG_DEBUG("make_tensor(tensor&, data, shape): ptr={}, shape={}", 
+                 reinterpret_cast<void*>(data), shape_str);
 
   auto tmp = make_tensor<typename TensorType::value_type, TensorType::Rank()>(data, shape, false);
   tensor.Shallow(tmp);
@@ -313,6 +376,9 @@ auto make_tensor( T *data,
                   ShapeType &&shape,
                   bool owning = false) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor<T,ShapeType>(data, shape, owning): ptr={}, owning={}", 
+                 reinterpret_cast<void*>(data), owning);
 
   constexpr int RANK = static_cast<int>(cuda::std::tuple_size<typename remove_cvref<ShapeType>::type>::value);
   DefaultDescriptor<RANK>
@@ -338,6 +404,9 @@ auto make_tensor( TensorType &tensor,
                   typename TensorType::value_type *data,
                   typename TensorType::shape_container &&shape) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor(tensor&, data, shape): ptr={}", reinterpret_cast<void*>(data));
+  
   auto tmp = make_tensor<typename TensorType::value_type, typename TensorType::shape_container>(data, std::forward<typename TensorType::shape_container>(shape), false);
   tensor.Shallow(tmp);
 }
@@ -356,6 +425,8 @@ template <typename T>
 auto make_tensor( T *ptr,
                   [[maybe_unused]] const std::initializer_list<detail::no_size_t> t,
                   bool owning = false) {
+  MATX_LOG_DEBUG("make_tensor<T>(ptr, 0D, owning): ptr={}, owning={}", 
+                 reinterpret_cast<void*>(ptr), owning);
   cuda::std::array<index_t, 0> shape;
   return make_tensor<T, decltype(shape)>(ptr, std::move(shape), owning);
 }
@@ -373,6 +444,7 @@ template <typename TensorType,
   std::enable_if_t<is_tensor_view_v<TensorType>, bool> = true>
 auto make_tensor( TensorType &tensor,
                   typename TensorType::value_type *ptr) {
+  MATX_LOG_DEBUG("make_tensor(tensor&, ptr, 0D): ptr={}", reinterpret_cast<void*>(ptr));
   auto tmp = make_tensor<typename TensorType::value_type>(ptr, false);
   tensor.Shallow(tmp);
 }
@@ -396,6 +468,9 @@ auto make_tensor_p( T *const data,
                     ShapeType &&shape,
                     bool owning = false) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor_p<T,ShapeType>(data, shape, owning): ptr={}, owning={}", 
+                 reinterpret_cast<const void*>(data), owning);
 
   constexpr int RANK = static_cast<int>(cuda::std::tuple_size<typename remove_cvref<ShapeType>::type>::value);
   DefaultDescriptor<RANK>
@@ -417,6 +492,14 @@ template <typename T, int RANK, typename Allocator>
 auto make_tensor( const index_t (&shape)[RANK],
                   Allocator&& alloc) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  std::string shape_str = "[";
+  for (int i = 0; i < RANK; i++) {
+    if (i > 0) shape_str += ",";
+    shape_str += std::to_string(shape[i]);
+  }
+  shape_str += "]";
+  MATX_LOG_DEBUG("make_tensor<T,RANK,Allocator>(shape, alloc): shape={}", shape_str);
 
   DefaultDescriptor<RANK> desc{shape};
   auto storage = make_owning_storage<T>(desc.TotalSize(), std::forward<Allocator>(alloc));
@@ -438,6 +521,8 @@ template <typename T, typename ShapeType, typename Allocator,
 auto make_tensor( ShapeType &&shape,
                   Allocator&& alloc) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor<T,ShapeType,Allocator>(shape, alloc)");
 
   constexpr int RANK = static_cast<int>(cuda::std::tuple_size<typename remove_cvref<ShapeType>::type>::value);
   DefaultDescriptor<RANK> desc{std::forward<ShapeType>(shape)};
@@ -461,6 +546,14 @@ void make_tensor( TensorType &tensor,
                   const index_t (&shape)[TensorType::Rank()],
                   Allocator&& alloc) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  std::string shape_str = "[";
+  for (int i = 0; i < TensorType::Rank(); i++) {
+    if (i > 0) shape_str += ",";
+    shape_str += std::to_string(shape[i]);
+  }
+  shape_str += "]";
+  MATX_LOG_DEBUG("make_tensor(tensor&, shape, alloc): shape={}", shape_str);
 
   auto tmp = make_tensor<typename TensorType::value_type, TensorType::Rank()>(shape, std::forward<Allocator>(alloc));
   tensor.Shallow(tmp);
@@ -483,6 +576,8 @@ void make_tensor( TensorType &tensor,
                   ShapeType &&shape,
                   Allocator&& alloc) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor(tensor&, shape, alloc)");
 
   auto tmp = make_tensor<typename TensorType::value_type>(std::forward<ShapeType>(shape), std::forward<Allocator>(alloc));
   tensor.Shallow(tmp);
@@ -505,6 +600,9 @@ auto make_tensor( T* const data,
                   D &&desc,
                   bool owning = false) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor<T,D>(data, desc, owning): ptr={}, owning={}", 
+                 reinterpret_cast<const void*>(data), owning);
 
   using Dstrip = typename remove_cvref<D>::type;
   auto storage = owning ? make_owning_storage<T>(desc.TotalSize()) : make_non_owning_storage<T>(data, desc.TotalSize());
@@ -528,6 +626,8 @@ auto make_tensor( TensorType &tensor,
                   typename TensorType::value_type* const data,
                   typename TensorType::desc_type &&desc) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor(tensor&, data, desc): ptr={}", reinterpret_cast<const void*>(data));
 
   // This tensor should be non-owning regardless of the original ownership since it will go out of scope at the end of the function
   auto tmp = make_tensor<typename TensorType::value_type, typename TensorType::desc_type>(data, std::forward<typename TensorType::desc_type>(desc), false);
@@ -547,6 +647,9 @@ auto make_tensor( D &&desc,
                   matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                   cudaStream_t stream = 0) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor<T,D>(desc, space, stream): space={}, stream={}", 
+                 static_cast<int>(space), reinterpret_cast<void*>(stream));
 
   using Dstrip = typename remove_cvref<D>::type;
 
@@ -570,6 +673,9 @@ auto make_tensor( TensorType &&tensor,
                   matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                   cudaStream_t stream = 0) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor(tensor&&, desc, space, stream): space={}, stream={}", 
+                 static_cast<int>(space), reinterpret_cast<void*>(stream));
 
   auto tmp = make_tensor<typename TensorType::value_type, typename TensorType::desc_type>(std::forward<typename TensorType::desc_type>(desc), space, stream);
   tensor.Shallow(tmp);
@@ -594,6 +700,18 @@ auto make_tensor( T *const data,
                   const index_t (&strides)[RANK],
                   bool owning = false) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  std::string shape_str = "[";
+  std::string strides_str = "[";
+  for (int i = 0; i < RANK; i++) {
+    if (i > 0) { shape_str += ","; strides_str += ","; }
+    shape_str += std::to_string(shape[i]);
+    strides_str += std::to_string(strides[i]);
+  }
+  shape_str += "]";
+  strides_str += "]";
+  MATX_LOG_DEBUG("make_tensor<T,RANK>(data, shape, strides, owning): ptr={}, shape={}, strides={}, owning={}", 
+                 reinterpret_cast<const void*>(data), shape_str, strides_str, owning);
 
   DefaultDescriptor<RANK>  desc{shape, strides};
   auto storage = owning ? make_owning_storage<T>(desc.TotalSize()) : make_non_owning_storage<T>(data, desc.TotalSize());
@@ -620,6 +738,18 @@ auto make_tensor( TensorType &tensor,
                   const index_t (&shape)[TensorType::Rank()],
                   const index_t (&strides)[TensorType::Rank()]) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  std::string shape_str = "[";
+  std::string strides_str = "[";
+  for (int i = 0; i < TensorType::Rank(); i++) {
+    if (i > 0) { shape_str += ","; strides_str += ","; }
+    shape_str += std::to_string(shape[i]);
+    strides_str += std::to_string(strides[i]);
+  }
+  shape_str += "]";
+  strides_str += "]";
+  MATX_LOG_DEBUG("make_tensor(tensor&, data, shape, strides): ptr={}, shape={}, strides={}", 
+                 reinterpret_cast<const void*>(data), shape_str, strides_str);
 
   auto tmp = make_tensor<typename TensorType::value_type, TensorType::Rank()>(data, shape, strides, false);
   tensor.Shallow(tmp);
@@ -633,6 +763,8 @@ auto make_tensor( TensorType &tensor,
 template <typename T, index_t I, index_t ...Is>
 auto make_static_tensor() {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_static_tensor<T,I,Is...>()");
 
   static_tensor_desc_t<I, Is...> desc{};
   auto storage = make_owning_storage<T>(desc.TotalSize());
@@ -644,6 +776,8 @@ template <typename TensorType,
 auto make_tensor( TensorType &tensor,
                   const DLManagedTensor dlp_tensor) {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)
+  
+  MATX_LOG_DEBUG("make_tensor(tensor&, DLManagedTensor): ptr={}", dlp_tensor.dl_tensor.data);
 
   using T = typename TensorType::value_type;
   const DLTensor &dt = dlp_tensor.dl_tensor;
diff --git a/include/matx/executors/cuda.h b/include/matx/executors/cuda.h
index 82757d82..5cb15b6b 100644
--- a/include/matx/executors/cuda.h
+++ b/include/matx/executors/cuda.h
@@ -38,6 +38,7 @@
 #include "matx/core/get_grid_dims.h"
 #include "matx/executors/kernel.h"
 #include "matx/executors/cuda_executor_common.h"
+#include "matx/core/log.h"
 #include <cuda/std/array>
 #include <utility>
 #include <vector>
@@ -269,6 +270,9 @@ namespace matx
             // Helper lambda to launch kernel
             auto launch_kernel = [&]<detail::ElementsPerThread EPT>() {
               dispatch_kernel.template operator()<EPT>([&](auto launch_func) {
+                MATX_LOG_DEBUG("Launching CUDA kernel: rank={}, blocks=({},{},{}), threads=({},{},{}), EPT={}, stream={}", 
+                               Op::Rank(), blocks.x, blocks.y, blocks.z, threads.x, threads.y, threads.z, 
+                               static_cast<int>(EPT), reinterpret_cast<void*>(stream_));
                 launch_func();
               });
             };
diff --git a/include/matx/generators/alternate.h b/include/matx/generators/alternate.h
index bf41ba78..9d185cb3 100644
--- a/include/matx/generators/alternate.h
+++ b/include/matx/generators/alternate.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/generator1d.h"
+#include "matx/core/log.h"
 
 namespace matx
 {
@@ -47,7 +48,11 @@ namespace matx
         using matxop = bool;        
 
 	      __MATX_INLINE__ std::string str() const { return "alternate"; }
-        __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ Alternating(index_t size) : size_(size) {};
+        __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ Alternating(index_t size) : size_(size) {
+#ifndef __CUDA_ARCH__
+          MATX_LOG_TRACE("Alternating constructor: size={}", size);
+#endif
+        };
 
         template <OperatorCapability Cap, typename InType>
         __MATX_INLINE__ __MATX_HOST__ auto get_capability([[maybe_unused]] InType &in) const {
diff --git a/include/matx/generators/bartlett.h b/include/matx/generators/bartlett.h
index 3351c9d9..d72d195e 100644
--- a/include/matx/generators/bartlett.h
+++ b/include/matx/generators/bartlett.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/generator1d.h"
+#include "matx/core/log.h"
 #include <type_traits>
 
 namespace matx
@@ -48,7 +49,11 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "bartlett"; }
 
-        inline __MATX_HOST__ __MATX_DEVICE__ Bartlett(index_t size) : size_(size){};
+        inline __MATX_HOST__ __MATX_DEVICE__ Bartlett(index_t size) : size_(size){
+#ifndef __CUDA_ARCH__
+          MATX_LOG_TRACE("Bartlett constructor: size={}", size);
+#endif
+        };
 
         template <typename CapType>
         inline __MATX_HOST__ __MATX_DEVICE__ auto operator()(index_t i) const
diff --git a/include/matx/generators/blackman.h b/include/matx/generators/blackman.h
index dc4aad33..27b9b1fa 100644
--- a/include/matx/generators/blackman.h
+++ b/include/matx/generators/blackman.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/generator1d.h"
+#include "matx/core/log.h"
 
 namespace matx
 {
@@ -47,7 +48,11 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "blackman"; }
 
-        __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ Blackman(index_t size) : size_(size){};
+        __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ Blackman(index_t size) : size_(size){
+#ifndef __CUDA_ARCH__
+          MATX_LOG_TRACE("Blackman constructor: size={}", size);
+#endif
+        };
 
         template <typename CapType>
         __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ auto operator()(index_t i) const
diff --git a/include/matx/generators/chirp.h b/include/matx/generators/chirp.h
index 768fa4e7..06f021e8 100644
--- a/include/matx/generators/chirp.h
+++ b/include/matx/generators/chirp.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/linspace.h"
+#include "matx/core/log.h"
 
 namespace matx
 {
@@ -70,7 +71,11 @@ namespace matx
           f1_(f1),          
           t1_(t1),
           method_(method)
-        {}
+        {
+#ifndef __CUDA_ARCH__
+          MATX_LOG_TRACE("Chirp constructor: f0={}, f1={}, t1={}", f0, f1, t1);
+#endif
+        }
 
         template <OperatorCapability Cap, typename InType>
         __MATX_INLINE__ __MATX_HOST__ auto get_capability([[maybe_unused]] InType &in) const {
@@ -131,7 +136,11 @@ namespace matx
           f1_(f1),
           t1_(t1),          
           method_(method)
-        {}
+        {
+#ifndef __CUDA_ARCH__
+          MATX_LOG_TRACE("ComplexChirp constructor: f0={}, f1={}, t1={}", f0, f1, t1);
+#endif
+        }
 
         template <OperatorCapability Cap, typename InType>
         __MATX_INLINE__ __MATX_HOST__ auto get_capability([[maybe_unused]] InType &in) const {
diff --git a/include/matx/generators/diag.h b/include/matx/generators/diag.h
index 83600a53..b3dece46 100644
--- a/include/matx/generators/diag.h
+++ b/include/matx/generators/diag.h
@@ -32,6 +32,7 @@
 
 #pragma once
 
+#include "matx/core/log.h"
 
 namespace matx
 {
@@ -56,6 +57,7 @@ namespace matx
         if constexpr (!is_noshape_v<ShapeType>) {
           static_assert(Rank() > 1, "Diagonal generator must be used with an operator of rank 1 or higher");
         }
+        MATX_LOG_TRACE("Diag constructor: rank={}, val={}", Rank(), val);
       };
 
       template <OperatorCapability Cap, typename InType>
diff --git a/include/matx/generators/fftfreq.h b/include/matx/generators/fftfreq.h
index d9d59519..d60e9dd4 100644
--- a/include/matx/generators/fftfreq.h
+++ b/include/matx/generators/fftfreq.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/generator1d.h"
+#include "matx/core/log.h"
 
 namespace matx
 {
@@ -52,6 +53,7 @@ namespace matx
         {
           n_ = n;
           d_ = d;
+          MATX_LOG_TRACE("FFTFreqOp constructor: n={}, d={}", n, d);
         }
 
         template <typename CapType>
diff --git a/include/matx/generators/flattop.h b/include/matx/generators/flattop.h
index 62943b7f..295fb9a3 100644
--- a/include/matx/generators/flattop.h
+++ b/include/matx/generators/flattop.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/generator1d.h"
+#include "matx/core/log.h"
 #include <type_traits>
 
 namespace matx
@@ -54,7 +55,11 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "flattop"; }
 
-        inline __MATX_HOST__ __MATX_DEVICE__ FlatTop(index_t size) : size_(size){};
+        inline __MATX_HOST__ __MATX_DEVICE__ FlatTop(index_t size) : size_(size){
+#ifndef __CUDA_ARCH__
+          MATX_LOG_TRACE("FlatTop constructor: size={}", size);
+#endif
+        };
 
         template <typename CapType>
         inline __MATX_HOST__ __MATX_DEVICE__ auto operator()(index_t i) const
diff --git a/include/matx/generators/hamming.h b/include/matx/generators/hamming.h
index 6a032a14..da7ef297 100644
--- a/include/matx/generators/hamming.h
+++ b/include/matx/generators/hamming.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/generator1d.h"
+#include "matx/core/log.h"
 #include <type_traits>
 
 namespace matx
@@ -48,7 +49,11 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "hamming"; }
 	
-        inline __MATX_HOST__ __MATX_DEVICE__ Hamming(index_t size) : size_(size){};
+        inline __MATX_HOST__ __MATX_DEVICE__ Hamming(index_t size) : size_(size){
+#ifndef __CUDA_ARCH__
+          MATX_LOG_TRACE("Hamming constructor: size={}", size);
+#endif
+        };
 
         template <typename CapType>
         inline __MATX_HOST__ __MATX_DEVICE__ auto operator()(index_t i) const 
diff --git a/include/matx/generators/hanning.h b/include/matx/generators/hanning.h
index d40a85d0..2a291d0d 100644
--- a/include/matx/generators/hanning.h
+++ b/include/matx/generators/hanning.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/generator1d.h"
+#include "matx/core/log.h"
 #include <type_traits>
 
 namespace matx
@@ -48,7 +49,11 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "hanning"; }
 
-        inline __MATX_HOST__ __MATX_DEVICE__ Hanning(index_t size) : size_(size){};
+        inline __MATX_HOST__ __MATX_DEVICE__ Hanning(index_t size) : size_(size){
+#ifndef __CUDA_ARCH__
+          MATX_LOG_TRACE("Hanning constructor: size={}", size);
+#endif
+        };
 
         template <typename CapType>
         inline __MATX_HOST__ __MATX_DEVICE__ auto operator()(index_t i) const
diff --git a/include/matx/generators/linspace.h b/include/matx/generators/linspace.h
index bd9dbadb..966e9a71 100644
--- a/include/matx/generators/linspace.h
+++ b/include/matx/generators/linspace.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/range.h"
+#include "matx/core/log.h"
 
 namespace matx
 {
@@ -60,6 +61,7 @@ namespace matx
 
         inline LinspaceOp(const T (&firsts)[NUM_RC], const T (&lasts)[NUM_RC], index_t count, int axis) 
         {
+          MATX_LOG_TRACE("LinspaceOp constructor: NUM_RC={}, count={}, axis={}", NUM_RC, count, axis);
           axis_ = axis;
           count_ = count;
           for (int i = 0; i < NUM_RC; ++i) {
diff --git a/include/matx/generators/logspace.h b/include/matx/generators/logspace.h
index 45cfc2b5..12a0862b 100644
--- a/include/matx/generators/logspace.h
+++ b/include/matx/generators/logspace.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/generator1d.h"
+#include "matx/core/log.h"
 #include <type_traits>
 
 namespace matx
@@ -67,6 +68,7 @@ namespace matx
           else {
             range_ = Range<T>{first, (last - first) / static_cast<T>(count - 1)};
           }
+          MATX_LOG_TRACE("Logspace constructor: first={}, last={}, count={}", first, last, count);
 #endif
         }
 
diff --git a/include/matx/generators/meshgrid.h b/include/matx/generators/meshgrid.h
index 67e1ba1e..9453e913 100644
--- a/include/matx/generators/meshgrid.h
+++ b/include/matx/generators/meshgrid.h
@@ -32,6 +32,7 @@
 
 #pragma once
 #include "matx/operators/permute.h"
+#include "matx/core/log.h"
 
 namespace matx
 {
@@ -55,6 +56,7 @@ namespace matx
           __MATX_INLINE__ MeshGridOp(T1 t1, cuda::std::array<index_t, RANK> shape, int idx) : t1_(t1), shape_(shape), idx_(idx) {
             static_assert(shape.size() == RANK );
             static_assert(is_matx_op<T1>());
+            MATX_LOG_TRACE("MeshGridOp constructor: rank={}, axis={}, idx={}", RANK, AXIS, idx);
           }
 
 
diff --git a/include/matx/generators/random.h b/include/matx/generators/random.h
index d8389b2e..03e1be8d 100644
--- a/include/matx/generators/random.h
+++ b/include/matx/generators/random.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/core/error.h"
+#include "matx/core/log.h"
 #include <cuda/std/complex>
 #include <curand_kernel.h>
 #include <type_traits>
@@ -279,6 +280,8 @@ namespace detail {
         for (int i = RANK - 2; i >= 0; i--) {
           strides_[i] = strides_[i+1] * s[i+1];
         }
+        
+        MATX_LOG_TRACE("RandomOp constructor: rank={}, total_size={}, seed={}", RANK, total_size_, seed);
       }
 
       template <OperatorCapability Cap, typename InType>
diff --git a/include/matx/generators/range.h b/include/matx/generators/range.h
index 01d817eb..bcd21a61 100644
--- a/include/matx/generators/range.h
+++ b/include/matx/generators/range.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include "matx/generators/generator1d.h"
+#include "matx/core/log.h"
 #include <type_traits>
 
 namespace matx
@@ -51,7 +52,9 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "range"; }
 
-        Range(T first, T step) : first_(first), step_(step) {}
+        Range(T first, T step) : first_(first), step_(step) {
+          MATX_LOG_TRACE("Range constructor: first={}, step={}", first, step);
+        }
 
         template <typename CapType>
         __MATX_DEVICE__ __MATX_HOST__ __MATX_INLINE__ auto operator()(index_t idx) const
diff --git a/include/matx/operators/all.h b/include/matx/operators/all.h
index 9490d772..04cbdb65 100644
--- a/include/matx/operators/all.h
+++ b/include/matx/operators/all.h
@@ -60,6 +60,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "all(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ AllOp(const OpA &a) : a_(a) { 
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         for (int r = 0; r < ORank; r++) {
           out_dims_[r] = a_.Size(r);
         }
diff --git a/include/matx/operators/ambgfun.h b/include/matx/operators/ambgfun.h
index 0ca5559d..43add18f 100644
--- a/include/matx/operators/ambgfun.h
+++ b/include/matx/operators/ambgfun.h
@@ -74,7 +74,7 @@ namespace matx
 
         __MATX_INLINE__ AmbgFunOp(const OpX &x, const OpY &y, double fs, AMBGFunCutType_t cut, float cut_val) : 
               x_(x), y_(y), fs_(fs), cut_(cut), cut_val_(cut_val) {
-          
+          MATX_LOG_TRACE("{} constructor: fs={}, cut={}", str(), fs, static_cast<int>(cut));
           static_assert(OpX::Rank() == 1, "Input to ambgfun must be rank 1");                
           if (cut == AMBGFUN_CUT_TYPE_2D) {
             out_dims_[0] = 2 * x_.Size(0) - 1;
diff --git a/include/matx/operators/any.h b/include/matx/operators/any.h
index 9cac0f81..3183b08a 100644
--- a/include/matx/operators/any.h
+++ b/include/matx/operators/any.h
@@ -60,6 +60,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "any(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ AnyOp(const OpA &a) : a_(a) { 
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         for (int r = 0; r < ORank; r++) {
           out_dims_[r] = a_.Size(r);
         }        
diff --git a/include/matx/operators/apply.h b/include/matx/operators/apply.h
index 2579f8f0..05e8555c 100644
--- a/include/matx/operators/apply.h
+++ b/include/matx/operators/apply.h
@@ -59,6 +59,7 @@ namespace matx
 
         __MATX_INLINE__ ApplyOp(Func func, const Ops&... ops) : func_(func), ops_(detail::base_type_t<Ops>(ops)...)
         {
+          MATX_LOG_TRACE("{} constructor: num_ops={}", str(), sizeof...(Ops));
           static_assert(sizeof...(Ops) > 0, "ApplyOp requires at least one input operator");
           
           // Initialize sizes from the first operator
diff --git a/include/matx/operators/apply_idx.h b/include/matx/operators/apply_idx.h
index 13adc09c..9a8d09c3 100644
--- a/include/matx/operators/apply_idx.h
+++ b/include/matx/operators/apply_idx.h
@@ -63,6 +63,7 @@ namespace matx
 
         __MATX_INLINE__ ApplyIdxOp(Func func, const Ops&... ops) : func_(func), ops_(detail::base_type_t<Ops>(ops)...)
         {
+          MATX_LOG_TRACE("{} constructor: num_ops={}", str(), sizeof...(Ops));
           static_assert(sizeof...(Ops) > 0, "ApplyIdxOp requires at least one input operator");
           
           // Initialize sizes from the first operator
diff --git a/include/matx/operators/argmax.h b/include/matx/operators/argmax.h
index 82f0d201..57ff01c6 100644
--- a/include/matx/operators/argmax.h
+++ b/include/matx/operators/argmax.h
@@ -57,7 +57,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "argmax(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ ArgMaxOp(const OpA &a) : a_(a) { 
-     
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
       };
 
       template <typename... Is>
diff --git a/include/matx/operators/argmin.h b/include/matx/operators/argmin.h
index b8a20370..5bdce769 100644
--- a/include/matx/operators/argmin.h
+++ b/include/matx/operators/argmin.h
@@ -56,7 +56,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "argmin(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ ArgMinOp(const OpA &a) : a_(a) {
-     
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
       };
 
       template <typename... Is>
diff --git a/include/matx/operators/argminmax.h b/include/matx/operators/argminmax.h
index 4f395cb9..8a978d06 100644
--- a/include/matx/operators/argminmax.h
+++ b/include/matx/operators/argminmax.h
@@ -57,7 +57,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "argminmax(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ ArgMinMaxOp(const OpA &a) : a_(a) {
-     
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
       };
 
       template <typename... Is>
diff --git a/include/matx/operators/argsort.h b/include/matx/operators/argsort.h
index a733a2da..98d3020b 100644
--- a/include/matx/operators/argsort.h
+++ b/include/matx/operators/argsort.h
@@ -61,6 +61,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "argsort()"; }
       __MATX_INLINE__ ArgsortOp(const OpA &a, const SortDirection_t dir) : a_(a), dir_(dir) { 
+        MATX_LOG_TRACE("{} constructor: rank={}, dir={}", str(), Rank(), static_cast<int>(dir));
         for (int r = 0; r < Rank(); r++) {
           out_dims_[r] = a_.Size(r);
         }
diff --git a/include/matx/operators/at.h b/include/matx/operators/at.h
index 4ee00479..2522414a 100644
--- a/include/matx/operators/at.h
+++ b/include/matx/operators/at.h
@@ -53,7 +53,9 @@ namespace matx
         using value_type = typename Op::value_type;
 
         __MATX_INLINE__ std::string str() const { return "at()"; }
-        __MATX_INLINE__ AtOp(const Op &op, Is... is) : op_(op), idx_{is...} {};
+        __MATX_INLINE__ AtOp(const Op &op, Is... is) : op_(op), idx_{is...} {
+          MATX_LOG_TRACE("{} constructor: num_indices={}", str(), sizeof...(Is));
+        };
 
         template <typename CapType, typename... Is2>
         __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()([[maybe_unused]] Is2... indices) const
diff --git a/include/matx/operators/base_operator.h b/include/matx/operators/base_operator.h
index 393c3e76..5a4e30e6 100644
--- a/include/matx/operators/base_operator.h
+++ b/include/matx/operators/base_operator.h
@@ -38,6 +38,7 @@
 #include "matx/core/operator_utils.h"
 #include "matx/core/capabilities.h"
 #include "matx/core/error.h"
+#include "matx/core/log.h"
 
 namespace matx
 {
diff --git a/include/matx/operators/binary_operators.h b/include/matx/operators/binary_operators.h
index 1aec8d31..21343d93 100644
--- a/include/matx/operators/binary_operators.h
+++ b/include/matx/operators/binary_operators.h
@@ -123,6 +123,7 @@ namespace matx
 
       __MATX_INLINE__ matxBinaryOp(const I1 &in1, const I2 &in2, const Op &op) : in1_(in1), in2_(in2), op_(op)
       {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         if constexpr (Rank() > 0)
         {
           MATX_ASSERT_COMPATIBLE_OP_SIZES(in1_);
diff --git a/include/matx/operators/cart2sph.h b/include/matx/operators/cart2sph.h
index ebbf45c3..470832b9 100644
--- a/include/matx/operators/cart2sph.h
+++ b/include/matx/operators/cart2sph.h
@@ -59,6 +59,7 @@ namespace matx
 
         __MATX_INLINE__ Cart2SphOp(const T1 &x, const T2 &y, const T3 &z) : x_(x), y_(y), z_(z)
       {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         MATX_ASSERT_COMPATIBLE_OP_SIZES(x);
         MATX_ASSERT_COMPATIBLE_OP_SIZES(y);
         MATX_ASSERT_COMPATIBLE_OP_SIZES(z);
diff --git a/include/matx/operators/cast.h b/include/matx/operators/cast.h
index 430f5ae4..aba8047c 100644
--- a/include/matx/operators/cast.h
+++ b/include/matx/operators/cast.h
@@ -71,7 +71,9 @@ namespace matx
         using value_type = NewType;
 
 	      __MATX_INLINE__ std::string str() const { return as_type_str<NewType>() + "(" + op_.str() + ")"; }
-        __MATX_INLINE__ CastOp(const T &op) : op_(op){};
+        __MATX_INLINE__ CastOp(const T &op) : op_(op){
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+        };
 
         template <typename CapType, typename... Is>
         __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices) const
@@ -137,6 +139,7 @@ namespace matx
 
 	      __MATX_INLINE__ std::string str() const { return as_type_str<NewType>() + "(" + real_op_.str() + "," + imag_op_.str() + ")"; }
         __MATX_INLINE__ ComplexCastOp(T1 real_op, T2 imag_op) : real_op_(real_op), imag_op_(imag_op) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           static_assert(detail::get_rank<T1>() == detail::get_rank<T2>(), "rank of real and imaginary operators must match");
           if (real_op_.Shape() != imag_op_.Shape()) {
             MATX_THROW(matxInvalidSize, "ComplexCastOp: sizes of input operators must match in all dimensions");
diff --git a/include/matx/operators/cgsolve.h b/include/matx/operators/cgsolve.h
index 91b109bc..a42c6153 100644
--- a/include/matx/operators/cgsolve.h
+++ b/include/matx/operators/cgsolve.h
@@ -65,7 +65,7 @@ namespace matx
 
         __MATX_INLINE__ CGSolveOp(const OpA &A, const OpB &B, double tol, int max_iters) : 
               a_(A), b_(B), tol_(tol), max_iters_(max_iters) {
-          
+          MATX_LOG_TRACE("{} constructor: tol={}, max_iters={}", str(), tol, max_iters);
           for (int r = 0; r < Rank(); r++) {
             out_dims_[r] = b_.Size(r);
           }
diff --git a/include/matx/operators/channelize_poly.h b/include/matx/operators/channelize_poly.h
index 40e71ebd..b6628836 100644
--- a/include/matx/operators/channelize_poly.h
+++ b/include/matx/operators/channelize_poly.h
@@ -68,7 +68,8 @@ namespace detail {
       __MATX_INLINE__ std::string str() const { return "channelize_poly(" + get_type_str(a_) + "," + get_type_str(f_) + ")";}
       __MATX_INLINE__ ChannelizePolyOp(const OpA &a, const FilterType &f, index_t num_channels, index_t decimation_factor) :
           a_(a), f_(f), num_channels_(num_channels), decimation_factor_(decimation_factor)
-      { 
+      {
+        MATX_LOG_TRACE("{} constructor: num_channels={}, decimation_factor={}", str(), num_channels, decimation_factor); 
         const index_t b_len = (a_.Size(OpA::Rank() - 1) + num_channels - 1) / num_channels;
 
         for (int r = 0; r < OpA::Rank()-1; r++) {
diff --git a/include/matx/operators/chol.h b/include/matx/operators/chol.h
index 8c57f8c9..cb7655ed 100644
--- a/include/matx/operators/chol.h
+++ b/include/matx/operators/chol.h
@@ -61,7 +61,9 @@ namespace detail {
       using can_alias = bool; // Chol is allowed to use the same input/output memory
 
       __MATX_INLINE__ std::string str() const { return "chol()"; }
-      __MATX_INLINE__ CholOp(const OpA &a, SolverFillMode uplo) : a_(a), uplo_(uplo) { }
+      __MATX_INLINE__ CholOp(const OpA &a, SolverFillMode uplo) : a_(a), uplo_(uplo) {
+        MATX_LOG_TRACE("{} constructor: uplo={}", str(), static_cast<int>(uplo));
+      }
 
       // This should never be called
       template <typename... Is>
diff --git a/include/matx/operators/clone.h b/include/matx/operators/clone.h
index ed29e950..33940579 100644
--- a/include/matx/operators/clone.h
+++ b/include/matx/operators/clone.h
@@ -83,7 +83,7 @@ MATX_IGNORE_WARNING_POP_GCC
             }
           }
           MATX_ASSERT(d == T::Rank(), matxInvalidDim);
-
+          MATX_LOG_TRACE("{} constructor: input_rank={}, output_rank={}", str(), T::Rank(), CRank);
         }
 
         template <typename CapType, typename Op, typename Dims, typename... Is>
diff --git a/include/matx/operators/collapse.h b/include/matx/operators/collapse.h
index c3afc062..8e3d153d 100644
--- a/include/matx/operators/collapse.h
+++ b/include/matx/operators/collapse.h
@@ -56,6 +56,7 @@ namespace matx
         {
           static_assert(DIM <= T1::Rank(),  "Collapse DIM must be less than or equal to Rank() of operator");
           static_assert(DIM > 1, "Must collapse multiple dims");
+          MATX_LOG_TRACE("{} constructor: input_rank={}, output_rank={}", str(), T1::Rank(), T1::Rank() - DIM + 1);
           static_assert(T1::Rank() >= 2, "Collapse must be called on operators with rank >= 2");
 
           // compute size of collapsed dimension
diff --git a/include/matx/operators/comma.h b/include/matx/operators/comma.h
index 7823cee3..005c5b1e 100644
--- a/include/matx/operators/comma.h
+++ b/include/matx/operators/comma.h
@@ -49,6 +49,7 @@ namespace matx
       class CommaOp : public BaseOp<CommaOp<Op1, Op2>>{
         public:
           __MATX_HOST__ __MATX_INLINE__  CommaOp(const Op1 &op1, const Op2 &op2) : op1_(op1), op2_(op2) {
+            MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
             MATX_STATIC_ASSERT_STR(Op1::Rank() == Op2::Rank(), matxInvalidSize, 
                 "Chained expressions using the comma operator must match in rank");
             if constexpr ( Rank() > 0) {
diff --git a/include/matx/operators/concat.h b/include/matx/operators/concat.h
index f4a35a2e..3f7d3c79 100644
--- a/include/matx/operators/concat.h
+++ b/include/matx/operators/concat.h
@@ -79,6 +79,7 @@ namespace matx
         static_assert(RANK > 0, "Cannot concatenate rank-0 tensors");
         static_assert(sizeof...(Ts) > 1, "Must have more than one tensor to concatenate");
         static_assert((... && (RANK == Ts::Rank())), "concatenated ops must have the same rank");
+        MATX_LOG_TRACE("{} constructor: rank={}, axis={}, num_tensors={}", str(), RANK, axis, sizeof...(Ts));
 
         for (int32_t i = 0; i < RANK; i++) {
           if(i == axis_) {
diff --git a/include/matx/operators/conv.h b/include/matx/operators/conv.h
index f40bc64e..de96de60 100644
--- a/include/matx/operators/conv.h
+++ b/include/matx/operators/conv.h
@@ -75,7 +75,7 @@ namespace matx
 
         __MATX_INLINE__ Conv1DOp(const OpA &A, const OpB &B, matxConvCorrMode_t mode, matxConvCorrMethod_t method, PermDims perm) : 
               a_(A), b_(B), mode_(mode), method_(method), perm_(perm) {
-
+          MATX_LOG_TRACE("{} constructor: mode={}, method={}", str(), static_cast<int>(mode), static_cast<int>(method));
           MATX_ASSERT_STR((!is_matx_type_v<typename OpA::value_type> && !is_matx_type_v<typename OpB::value_type>) || 
                           method == MATX_C_METHOD_DIRECT, 
             matxInvalidType, "FFT convolutions do not support half precision float currently");
@@ -304,7 +304,7 @@ namespace detail {
 
       __MATX_INLINE__ Conv2DOp(const OpA &A, const OpB &B, matxConvCorrMode_t mode, PermDims perm) : 
             a_(A), b_(B), mode_(mode), perm_(perm) {
-
+        MATX_LOG_TRACE("{} constructor: mode={}", str(), static_cast<int>(mode));
         // Currently when using the axis parameter the rank of inputs must be equal
         if constexpr (!std::is_same_v<PermDims, no_permute_t>) {
           for (int r = 0; r < Rank(); r++) {
diff --git a/include/matx/operators/corr.h b/include/matx/operators/corr.h
index 29e9a73e..7d72e63a 100644
--- a/include/matx/operators/corr.h
+++ b/include/matx/operators/corr.h
@@ -71,7 +71,7 @@ namespace matx
 
         __MATX_INLINE__ CorrOp(const OpA &A, const OpB &B, matxConvCorrMode_t mode, [[maybe_unused]] matxConvCorrMethod_t method, PermDims perm) : 
               a_(A), b_(B), mode_(mode), method_(method), perm_(perm) {
-
+          MATX_LOG_TRACE("{} constructor: mode={}, method={}", str(), static_cast<int>(mode), static_cast<int>(method));
           // Currently when using the axis parameter the rank of inputs must be equal
           if constexpr (!std::is_same_v<PermDims, no_permute_t>) {
             for (int r = 0; r < Rank(); r++) {
diff --git a/include/matx/operators/cov.h b/include/matx/operators/cov.h
index bb3f1441..4f16c6ea 100644
--- a/include/matx/operators/cov.h
+++ b/include/matx/operators/cov.h
@@ -62,7 +62,7 @@ namespace matx
 
         __MATX_INLINE__ CovOp(const OpA &A) : 
               a_(A) {
-          
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           for (int r = 0; r < Rank(); r++) {
             out_dims_[r] = a_.Size(r);
           }
diff --git a/include/matx/operators/cross.h b/include/matx/operators/cross.h
index 65033ce5..a4f5c5f0 100644
--- a/include/matx/operators/cross.h
+++ b/include/matx/operators/cross.h
@@ -66,6 +66,7 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "cross()"; }
         __MATX_INLINE__ CrossOp(const OpA &A, const OpB &B) : a_(A), b_(B) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           MATX_STATIC_ASSERT_STR(OpA::Rank() >= 1 && OpB::Rank() >= 1, matxInvalidDim, "Operators to cross() must have rank GTE one.");
 
           //dims other than the last are batched, so count R-->L, beginning one-left of the right-most dim
diff --git a/include/matx/operators/cumsum.h b/include/matx/operators/cumsum.h
index 1e3d70dc..0d82d6a4 100644
--- a/include/matx/operators/cumsum.h
+++ b/include/matx/operators/cumsum.h
@@ -64,6 +64,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "cumsum()"; }
       __MATX_INLINE__ CumSumOp(const OpA &a) : a_(a) { 
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         for (int r = 0; r < Rank(); r++) {
           out_dims_[r] = a_.Size(r);
         }
diff --git a/include/matx/operators/dct.h b/include/matx/operators/dct.h
index 8a3ea10e..87d63c54 100644
--- a/include/matx/operators/dct.h
+++ b/include/matx/operators/dct.h
@@ -54,7 +54,9 @@ template <typename I, typename Out> class dctOp : public BaseOp<dctOp<I, Out>> {
   index_t N_;
 
 public:
-  dctOp(Out out, I in, index_t N) : out_(out), in_(in), N_(N) {}
+  dctOp(Out out, I in, index_t N) : out_(out), in_(in), N_(N) {
+    MATX_LOG_TRACE("dctOp constructor: N={}", N);
+  }
 
   template <typename CapType>
   __MATX_DEVICE__ inline void operator()(index_t idx)
diff --git a/include/matx/operators/dense2sparse.h b/include/matx/operators/dense2sparse.h
index 8224dd09..ca0ae78e 100644
--- a/include/matx/operators/dense2sparse.h
+++ b/include/matx/operators/dense2sparse.h
@@ -51,7 +51,9 @@ class Dense2SparseOp : public BaseOp<Dense2SparseOp<OpA>> {
   using tosparse_xform_op = bool;
   using value_type = typename OpA::value_type;
 
-  __MATX_INLINE__ Dense2SparseOp(const OpA &a) : a_(a) {}
+  __MATX_INLINE__ Dense2SparseOp(const OpA &a) : a_(a) {
+    MATX_LOG_TRACE("{} constructor: rank={}", str(), OpA::Rank());
+  }
 
   __MATX_INLINE__ std::string str() const {
     return "dense2sparse(" + get_type_str(a_) + ")";
diff --git a/include/matx/operators/det.h b/include/matx/operators/det.h
index 53f4e6ba..72724414 100644
--- a/include/matx/operators/det.h
+++ b/include/matx/operators/det.h
@@ -55,7 +55,9 @@ namespace detail {
       using det_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "det()"; }
-      __MATX_INLINE__ DetOp(const OpA &a) : a_(a) { }
+      __MATX_INLINE__ DetOp(const OpA &a) : a_(a) {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+      }
 
       // This should never be called
       template <typename... Is>
diff --git a/include/matx/operators/diag.h b/include/matx/operators/diag.h
index dbff9964..2bbc966a 100644
--- a/include/matx/operators/diag.h
+++ b/include/matx/operators/diag.h
@@ -61,7 +61,9 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "diag(" + op_.str() + ")"; }
 
-        __MATX_INLINE__ DiagOp(const T1 &op, index_t k) : op_(op), k_(k) { }
+        __MATX_INLINE__ DiagOp(const T1 &op, index_t k) : op_(op), k_(k) {
+          MATX_LOG_TRACE("{} constructor: k={}", str(), k);
+        }
 
         template <typename CapType, typename... Is>
         __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices) const
diff --git a/include/matx/operators/eig.h b/include/matx/operators/eig.h
index 09e6ebb8..47ac1e1e 100644
--- a/include/matx/operators/eig.h
+++ b/include/matx/operators/eig.h
@@ -60,7 +60,9 @@ namespace detail {
       using eig_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "eig()"; }
-      __MATX_INLINE__ EigOp(const OpA &a, EigenMode jobz, SolverFillMode uplo) : a_(a), jobz_(jobz), uplo_(uplo) { };
+      __MATX_INLINE__ EigOp(const OpA &a, EigenMode jobz, SolverFillMode uplo) : a_(a), jobz_(jobz), uplo_(uplo) {
+        MATX_LOG_TRACE("{} constructor: jobz={}, uplo={}", str(), static_cast<int>(jobz), static_cast<int>(uplo));
+      };
 
       // This should never be called
       template <typename... Is>
diff --git a/include/matx/operators/einsum.h b/include/matx/operators/einsum.h
index 0e476107..ad66de75 100644
--- a/include/matx/operators/einsum.h
+++ b/include/matx/operators/einsum.h
@@ -58,7 +58,9 @@ namespace detail {
       using einsum_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "einsum()"; }
-      __MATX_INLINE__ EinsumOp(const std::string &subscripts, const OpA&... ops) : subscripts_(subscripts), a_(ops...) { };
+      __MATX_INLINE__ EinsumOp(const std::string &subscripts, const OpA&... ops) : subscripts_(subscripts), a_(ops...) {
+        MATX_LOG_TRACE("{} constructor: subscripts=\"{}\"", str(), subscripts);
+      };
 
       // This should never be called
       template <typename... Is>
diff --git a/include/matx/operators/fft.h b/include/matx/operators/fft.h
index 82e43ab6..06c4c7e4 100644
--- a/include/matx/operators/fft.h
+++ b/include/matx/operators/fft.h
@@ -105,6 +105,7 @@ namespace matx
 
         __MATX_INLINE__ FFTOp(const OpA &a, index_t size, PermDims perm, FFTNorm norm) : 
             a_(a), fft_size_(size),  perm_(perm), norm_(norm) {
+          MATX_LOG_TRACE("{} constructor: fft_size={}, norm={}", str(), size, static_cast<int>(norm));
           for (int r = 0; r < Rank(); r++) {
             out_dims_[r] = a_.Size(r);
           }
@@ -697,6 +698,7 @@ namespace matx
         }
 
         __MATX_INLINE__ FFT2Op(const OpA &a, PermDims perm, FFTNorm norm) : a_(a),  perm_(perm), norm_(norm) {
+          MATX_LOG_TRACE("{} constructor: norm={}", str(), static_cast<int>(norm));
           for (int r = 0; r < Rank(); r++) {
             out_dims_[r] = a_.Size(r);
           }
diff --git a/include/matx/operators/fftshift.h b/include/matx/operators/fftshift.h
index 031aae07..adf62a38 100644
--- a/include/matx/operators/fftshift.h
+++ b/include/matx/operators/fftshift.h
@@ -52,6 +52,7 @@ namespace matx
         __MATX_INLINE__ std::string str() const { return "fftshift(" + op_.str() + ")"; }
 
         __MATX_INLINE__ FFTShift1DOp(const T1 &op) : op_(op){
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           static_assert(Rank() >= 1, "1D FFT shift must have a rank 1 operator or higher");
         };
 
@@ -169,6 +170,7 @@ namespace matx
         __MATX_INLINE__ std::string str() const { return "fftshift(" + op_.str() + ")"; }
 
         __MATX_INLINE__ FFTShift2DOp(const T1 &op) : op_(op){
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           static_assert(Rank() >= 2, "2D FFT shift must have a rank 2 operator or higher");
         };
 
diff --git a/include/matx/operators/filter.h b/include/matx/operators/filter.h
index c282dfa2..2af830a3 100644
--- a/include/matx/operators/filter.h
+++ b/include/matx/operators/filter.h
@@ -65,6 +65,7 @@ namespace detail {
       }
       __MATX_INLINE__ FilterOp(const OpA &a, const cuda::std::array<FilterType, NR> h_rec,
             const cuda::std::array<FilterType, NNR> h_nonrec) : a_(a), h_rec_(h_rec), h_nonrec_(h_nonrec) { 
+        MATX_LOG_TRACE("{} constructor: rank={}, NR={}, NNR={}", str(), Rank(), NR, NNR);
         for (int r = 0; r < Rank(); r++) {
           out_dims_[r] = a_.Size(r);
         }              
diff --git a/include/matx/operators/find.h b/include/matx/operators/find.h
index 09be65d6..a51b171c 100644
--- a/include/matx/operators/find.h
+++ b/include/matx/operators/find.h
@@ -56,7 +56,9 @@ namespace detail {
       using find_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "find()"; }
-      __MATX_INLINE__ FindOp(const OpA &a, SelectType sel) : a_(a), sel_(sel) { };
+      __MATX_INLINE__ FindOp(const OpA &a, SelectType sel) : a_(a), sel_(sel) {
+        MATX_LOG_TRACE("{} constructor", str());
+      };
 
       // This should never be called
       template <typename... Is>
diff --git a/include/matx/operators/find_idx.h b/include/matx/operators/find_idx.h
index fedde45a..9827b1d7 100644
--- a/include/matx/operators/find_idx.h
+++ b/include/matx/operators/find_idx.h
@@ -56,7 +56,9 @@ namespace detail {
       using find_idx_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "find_idx()"; }
-      __MATX_INLINE__ FindIdxOp(const OpA &a, SelectType sel) : a_(a), sel_(sel) { };
+      __MATX_INLINE__ FindIdxOp(const OpA &a, SelectType sel) : a_(a), sel_(sel) {
+        MATX_LOG_TRACE("{} constructor", str());
+      };
 
       // This should never be called
       template <typename... Is>
diff --git a/include/matx/operators/find_peaks.h b/include/matx/operators/find_peaks.h
index a15d75f3..5a2d3754 100644
--- a/include/matx/operators/find_peaks.h
+++ b/include/matx/operators/find_peaks.h
@@ -61,6 +61,7 @@ namespace detail {
       __MATX_INLINE__ FindPeaksOp(const OpA &a, value_type height, 
                                                 value_type threshold) : 
                                                 a_(a), height_(height), threshold_(threshold) { 
+        MATX_LOG_TRACE("{} constructor: height={}, threshold={}", str(), height, threshold);
       }
 
       template <typename... Is>
diff --git a/include/matx/operators/flatten.h b/include/matx/operators/flatten.h
index e4b8e21a..f83f6125 100644
--- a/include/matx/operators/flatten.h
+++ b/include/matx/operators/flatten.h
@@ -55,6 +55,7 @@ namespace matx
         __MATX_INLINE__ FlattenOp(const T1 &op1) : op1_(op1)
         {
           static_assert(T1::Rank() > 1, "flatten has no effect on tensors of rank 0 and 1");
+          MATX_LOG_TRACE("{} constructor: input_rank={}, output_rank=1", str(), T1::Rank());
         }
 
         template <typename CapType, typename Is>
diff --git a/include/matx/operators/frexp.h b/include/matx/operators/frexp.h
index f1092a4e..58ce0c3e 100644
--- a/include/matx/operators/frexp.h
+++ b/include/matx/operators/frexp.h
@@ -51,6 +51,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "frexp()"; }
       __MATX_INLINE__ FrexpOp(const OpA &a) : a_(a) {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         static_assert(std::is_floating_point_v<value_type> ||
                       is_cuda_complex_v<value_type>, "frexp() must take a floating point input");
 
diff --git a/include/matx/operators/hermitian.h b/include/matx/operators/hermitian.h
index 4873e2c7..89db0f41 100644
--- a/include/matx/operators/hermitian.h
+++ b/include/matx/operators/hermitian.h
@@ -57,6 +57,7 @@ namespace matx
 
 	      __MATX_INLINE__ std::string str() const { return "hermitian(" + op_.str() + ")"; }
         __MATX_INLINE__ HermitianTransOp(const T1 &op) : op_(op) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           static_assert(Rank() >= 2, "Hermitian operation needs input with rank >= 2");
         }
 
diff --git a/include/matx/operators/hist.h b/include/matx/operators/hist.h
index a9a6a888..5dbc22a9 100644
--- a/include/matx/operators/hist.h
+++ b/include/matx/operators/hist.h
@@ -64,6 +64,7 @@ namespace detail {
       __MATX_INLINE__ std::string str() const { return "hist()"; }
       __MATX_INLINE__ HistOp(const OpA &a, typename OpA::value_type lower, typename OpA::value_type upper, int num_levels) : 
           a_(a), lower_(lower), upper_(upper), num_levels_(num_levels) { 
+        MATX_LOG_TRACE("{} constructor: num_levels={}", str(), num_levels);
         for (int r = 0; r < Rank(); r++) {
           out_dims_[r] = a_.Size(r);
         }
diff --git a/include/matx/operators/if.h b/include/matx/operators/if.h
index 44554ff3..df86ab00 100644
--- a/include/matx/operators/if.h
+++ b/include/matx/operators/if.h
@@ -70,6 +70,7 @@ namespace matx
        */
       __MATX_INLINE__ IFOP(const T1 &cond, const T2 &op) : cond_(cond), op_(op)
       {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         static_assert((!is_tensor_view_v<T2>),
             "Only operator emmitters are allowed in IF. Tensor views are "
             "not allowed");
diff --git a/include/matx/operators/ifelse.h b/include/matx/operators/ifelse.h
index e6ce941d..8a46f48e 100644
--- a/include/matx/operators/ifelse.h
+++ b/include/matx/operators/ifelse.h
@@ -77,6 +77,7 @@ namespace matx
       __MATX_INLINE__ IFELSEOp(const C1 &cond, const T1 &op1, const T2 &op2) :
                               cond_(cond), op1_(op1), op2_(op2)
     {
+      MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
       static_assert((!is_tensor_view_v<T1> && !is_tensor_view_v<T2>),
           "Only operator emmitters are allowed in IFELSE. Tensor views "
           "are not allowed");
diff --git a/include/matx/operators/index.h b/include/matx/operators/index.h
index a6b07a22..68004af8 100644
--- a/include/matx/operators/index.h
+++ b/include/matx/operators/index.h
@@ -53,7 +53,9 @@ namespace matx
         using value_type = index_t;
 
         __MATX_INLINE__ std::string str() const { return "index()"; } 
-        __MATX_INLINE__ IndexOp(int dim) : dim_(dim){};  
+        __MATX_INLINE__ IndexOp(int dim) : dim_(dim){
+          MATX_LOG_TRACE("{} constructor: dim={}", str(), dim);
+        };  
 
         template <typename CapType, typename... Is>
         __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ auto operator()(Is... indices) const 
diff --git a/include/matx/operators/interleaved.h b/include/matx/operators/interleaved.h
index 94ad969a..2546b6f3 100644
--- a/include/matx/operators/interleaved.h
+++ b/include/matx/operators/interleaved.h
@@ -55,6 +55,7 @@ namespace matx
         __MATX_INLINE__ std::string str() const { return "interleaved(" + op_.str() + ")"; }
 
         __MATX_INLINE__ ComplexInterleavedOp(const T1 &op) : op_(op) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           static_assert(!is_complex_v<extract_value_type_t<T1>>, "Complex interleaved op only works on scalar input types");
           static_assert(Rank() > 0);
         };
diff --git a/include/matx/operators/interp.h b/include/matx/operators/interp.h
index df4b8102..0ca38f05 100644
--- a/include/matx/operators/interp.h
+++ b/include/matx/operators/interp.h
@@ -397,6 +397,7 @@ namespace matx {
         xq_(xq),
         method_(method)
       {
+        MATX_LOG_TRACE("{} constructor: method={}", str(), static_cast<int>(method));
         if (x_.Size(x_.Rank() - 1) != v_.Size(v_.Rank() - 1)) {
           MATX_THROW(matxInvalidSize, "interp1: sample points and values must have the same size in the last dimension");
         }
diff --git a/include/matx/operators/inverse.h b/include/matx/operators/inverse.h
index 813af389..d09f1ad0 100644
--- a/include/matx/operators/inverse.h
+++ b/include/matx/operators/inverse.h
@@ -57,7 +57,9 @@ namespace detail {
       using inv_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "inv()"; }
-      __MATX_INLINE__ InvOp(const OpA &a) : a_(a) {};
+      __MATX_INLINE__ InvOp(const OpA &a) : a_(a) {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+      };
 
 
       template <typename CapType, typename... Is>
diff --git a/include/matx/operators/isclose.h b/include/matx/operators/isclose.h
index 60e79fb4..66923962 100644
--- a/include/matx/operators/isclose.h
+++ b/include/matx/operators/isclose.h
@@ -55,6 +55,7 @@ namespace matx
         __MATX_INLINE__ IsCloseOp(const Op1 &op1, const Op2 &op2, double rtol, double atol) :
           op1_(op1), op2_(op2), rtol_(static_cast<inner_type>(rtol)), atol_(static_cast<inner_type>(atol))
         {
+          MATX_LOG_TRACE("{} constructor: rtol={}, atol={}", str(), rtol, atol);
           static_assert(Op1::Rank() == Op2::Rank(), "Operator ranks must match in isclose()");
           MATX_ASSERT_COMPATIBLE_OP_SIZES(op1);
           MATX_ASSERT_COMPATIBLE_OP_SIZES(op2);
diff --git a/include/matx/operators/kronecker.h b/include/matx/operators/kronecker.h
index 7389c6a2..ad17e926 100644
--- a/include/matx/operators/kronecker.h
+++ b/include/matx/operators/kronecker.h
@@ -61,6 +61,7 @@ namespace matx
 
         __MATX_INLINE__ KronOp(const T1 &op1, const T2 &op2) : op1_(op1), op2_(op2)
         {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           static_assert(RankGTE(Rank(), 2), "Kronecker product must be used on tensors with rank 2 or higher");
         }        
 
diff --git a/include/matx/operators/legendre.h b/include/matx/operators/legendre.h
index 2f8bb611..d07371f2 100644
--- a/include/matx/operators/legendre.h
+++ b/include/matx/operators/legendre.h
@@ -95,6 +95,7 @@ namespace matx
         __MATX_INLINE__ std::string str() const { return "legendre(" + get_type_str(n_) + "," + get_type_str(m_) + "," + get_type_str(in_) + ")"; }
 
         __MATX_INLINE__ LegendreOp(const T1 &n, const T2 &m, const T3 &in, cuda::std::array<int,2> axis) : n_(n), m_(m), in_(in), axis_(axis) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           static_assert(get_rank<T1>() <= 1, "legendre op:  n must be a scalar, rank 0 or 1 operator");
           static_assert(get_rank<T2>() <= 1, "legendre op:  m must be a scalar, rank 0 or 1 operator");
         }
diff --git a/include/matx/operators/lu.h b/include/matx/operators/lu.h
index 5a5fee11..54ce65fc 100644
--- a/include/matx/operators/lu.h
+++ b/include/matx/operators/lu.h
@@ -55,7 +55,9 @@ namespace detail {
       using lu_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "lu()"; }
-      __MATX_INLINE__ LUOp(const OpA &a) : a_(a) { };
+      __MATX_INLINE__ LUOp(const OpA &a) : a_(a) {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+      };
 
       // This should never be called
       template <typename... Is>
diff --git a/include/matx/operators/matmul.h b/include/matx/operators/matmul.h
index e72ec621..a341840e 100644
--- a/include/matx/operators/matmul.h
+++ b/include/matx/operators/matmul.h
@@ -73,6 +73,7 @@ namespace matx
 
         __MATX_INLINE__ MatMulOp(const OpA &a, const OpB &b, float alpha, float beta, PermDims perm) : 
               a_(a), b_(b), alpha_(alpha), beta_(beta), perm_(perm) {
+          MATX_LOG_TRACE("{} constructor: alpha={}, beta={}", str(), alpha, beta);
           if constexpr (!std::is_same_v<PermDims, no_permute_t>) {
             for (int r = 0; r < Rank(); r++) {
               if (r == Rank() - 2) {
diff --git a/include/matx/operators/matvec.h b/include/matx/operators/matvec.h
index 288ee8c1..692e0e23 100644
--- a/include/matx/operators/matvec.h
+++ b/include/matx/operators/matvec.h
@@ -67,7 +67,7 @@ namespace matx
 
         __MATX_INLINE__ MatVecOp(const OpA &A, const OpB &B, float alpha, float beta) : 
               a_(A), b_(B), alpha_(alpha), beta_(beta) {
-          
+          MATX_LOG_TRACE("{} constructor: alpha={}, beta={}", str(), alpha, beta);
           for (int r = 0; r < Rank(); r++) {
             out_dims_[r] = a_.Size(r);
           }
diff --git a/include/matx/operators/max.h b/include/matx/operators/max.h
index 3a8fcdb4..415c0145 100644
--- a/include/matx/operators/max.h
+++ b/include/matx/operators/max.h
@@ -60,6 +60,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "max(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ MaxOp(const OpA &a) : a_(a) { 
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         for (int r = 0; r < ORank; r++) {
           out_dims_[r] = a_.Size(r);
         }        
diff --git a/include/matx/operators/mean.h b/include/matx/operators/mean.h
index eb1ae6ff..b928d423 100644
--- a/include/matx/operators/mean.h
+++ b/include/matx/operators/mean.h
@@ -60,6 +60,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "mean(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ MeanOp(const OpA &a) : a_(a) { 
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         for (int r = 0; r < ORank; r++) {
           out_dims_[r] = a_.Size(r);
         }            
diff --git a/include/matx/operators/median.h b/include/matx/operators/median.h
index e48709b6..5e7cc24d 100644
--- a/include/matx/operators/median.h
+++ b/include/matx/operators/median.h
@@ -60,6 +60,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "median(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ MedianOp(const OpA &a) : a_(a) { 
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         for (int r = 0; r < ORank; r++) {
           out_dims_[r] = a_.Size(r);
         }            
diff --git a/include/matx/operators/min.h b/include/matx/operators/min.h
index ae6b4855..b4fe1fe1 100644
--- a/include/matx/operators/min.h
+++ b/include/matx/operators/min.h
@@ -60,6 +60,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "min(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ MinOp(const OpA &a) : a_(a) { 
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         for (int r = 0; r < ORank; r++) {
           out_dims_[r] = a_.Size(r);
         }        
diff --git a/include/matx/operators/norm.h b/include/matx/operators/norm.h
index c85e0b00..351e2d80 100644
--- a/include/matx/operators/norm.h
+++ b/include/matx/operators/norm.h
@@ -70,6 +70,7 @@ namespace matx
       }
 
       __MATX_INLINE__ NormOp(const OpA &op, NormOrder order) : a_(op), order_(order) {
+        MATX_LOG_TRACE("{} constructor: order={}", str(), static_cast<int>(order));
         if constexpr (std::is_same_v<NormType, detail::NormTypeVector>) {
           MATX_ASSERT_STR(order == NormOrder::NONE || order == NormOrder::L1 || order == NormOrder::L2, matxInvalidParameter,
             "Invalid norm order used for vector mode");
diff --git a/include/matx/operators/normalize.h b/include/matx/operators/normalize.h
index d4de7055..4db08cf7 100644
--- a/include/matx/operators/normalize.h
+++ b/include/matx/operators/normalize.h
@@ -72,10 +72,12 @@ namespace matx
         using self_type = NormalizeOp<OpA, DIM>;
 
         __MATX_INLINE__ NormalizeOp(const OpA &op, const NORMALIZE_RANGE method): op_(op), normalize_method(method) {
+          MATX_LOG_TRACE("{} constructor: method={}", str(), static_cast<int>(method));
           InitNormalize();
         }
 
         __MATX_INLINE__ NormalizeOp(const OpA &op, const NORMALIZE_RANGE method, const float p): op_(op), normalize_method(method),  p_(p){
+          MATX_LOG_TRACE("{} constructor: method={}, p={}", str(), static_cast<int>(method), p);
           MATX_ASSERT_STR(normalize_method == NORMALIZE_RANGE::NORM, matxInvalidParameter, "p value can be specified for only p-norm");
           InitNormalize();
         }
diff --git a/include/matx/operators/outer.h b/include/matx/operators/outer.h
index 958adc4e..e8b4a141 100644
--- a/include/matx/operators/outer.h
+++ b/include/matx/operators/outer.h
@@ -67,7 +67,7 @@ namespace matx
 
         __MATX_INLINE__ OuterOp(const OpA &A, const OpB &B, float alpha, float beta) : 
               a_(A), b_(B), alpha_(alpha), beta_(beta) {
-
+          MATX_LOG_TRACE("{} constructor: alpha={}, beta={}", str(), alpha, beta);
           out_dims_[RANK - 1] = b_.Size(OpB::Rank() - 1);
           out_dims_[RANK - 2] = a_.Size(OpA::Rank() - 1);
           if constexpr (remove_cvref_t<OpA>::Rank() >= remove_cvref_t<OpB>::Rank()) {
diff --git a/include/matx/operators/overlap.h b/include/matx/operators/overlap.h
index 0ca7df56..5dc40510 100644
--- a/include/matx/operators/overlap.h
+++ b/include/matx/operators/overlap.h
@@ -65,7 +65,7 @@ namespace matx
         __MATX_INLINE__ std::string str() const { return "overlap(" + op_.str() + ")"; }
         __MATX_INLINE__ OverlapOp(const T &op, const cuda::std::array<shape_type, DIM> &windows,
                                       const cuda::std::array<shape_type, DIM> &strides) : op_(op) {
-
+          MATX_LOG_TRACE("{} constructor: dim={}, rank={}", str(), DIM, Rank());
           // This only works for 1D tensors going to 2D at the moment. Generalize to
           // higher dims later
           index_t window_size = windows[0];
diff --git a/include/matx/operators/pad.h b/include/matx/operators/pad.h
index df768775..caf9ebca 100644
--- a/include/matx/operators/pad.h
+++ b/include/matx/operators/pad.h
@@ -81,6 +81,7 @@ namespace matx
       __MATX_INLINE__ PadOp(const T& op, int axis, const PadSizeType& pad_sizes, const value_type& pad_value, PadMode mode = MATX_PAD_MODE_CONSTANT) 
         : op_(op), axis_(axis), pad_value_(pad_value), mode_(mode)
       {
+        MATX_LOG_TRACE("{} constructor: axis={}, mode={}", str(), axis, static_cast<int>(mode));
         static_assert(RANK > 0, "Cannot pad rank-0 tensors");
         MATX_ASSERT_STR(axis >= 0 && axis < RANK, matxInvalidDim, "pad axis must be >= 0 and less than the rank of the operator");
         MATX_ASSERT_STR(pad_sizes.size() == 2, matxInvalidParameter, "pad_sizes must contain exactly 2 elements [before, after]");
diff --git a/include/matx/operators/percentile.h b/include/matx/operators/percentile.h
index c8b3ecc5..353b9108 100644
--- a/include/matx/operators/percentile.h
+++ b/include/matx/operators/percentile.h
@@ -61,6 +61,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "percentile(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ PercentileOp(const OpA &a, unsigned char q, PercentileMethod method) : a_(a), q_(q), method_(method) {
+        MATX_LOG_TRACE("{} constructor: q={}, method={}", str(), static_cast<int>(q), static_cast<int>(method));
         for (int r = 0; r < ORank; r++) {
           out_dims_[r]    = (r == ORank - 1) ? 1 : a_.Size(r);
         }
diff --git a/include/matx/operators/permute.h b/include/matx/operators/permute.h
index f7c151cc..3e408c47 100644
--- a/include/matx/operators/permute.h
+++ b/include/matx/operators/permute.h
@@ -74,6 +74,7 @@ namespace matx
 
             dims_[i] = dims[i];
           }
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         }
 
         template <typename CapType, typename Op, typename Dims, typename... Is>
diff --git a/include/matx/operators/pinv.h b/include/matx/operators/pinv.h
index a109a30e..dde0cf12 100644
--- a/include/matx/operators/pinv.h
+++ b/include/matx/operators/pinv.h
@@ -59,6 +59,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "pinv()"; }
       __MATX_INLINE__ PinvOp(const OpA &a, float rcond) : a_(a), rcond_(rcond) {
+        MATX_LOG_TRACE("{} constructor: rcond={}", str(), rcond);
         for (int r = 0; r < Rank(); r++) {
           if (r >= Rank() - 2) {
             out_dims_[r] = (r == Rank() - 1) ? a_.Size(Rank() - 2) : a_.Size(Rank() - 1);
diff --git a/include/matx/operators/planar.h b/include/matx/operators/planar.h
index 78998a87..7dccefbf 100644
--- a/include/matx/operators/planar.h
+++ b/include/matx/operators/planar.h
@@ -54,6 +54,7 @@ namespace matx
         __MATX_INLINE__ ComplexPlanarOp(const T1 &op) : op_(op) {
           static_assert(is_complex_v<extract_value_type_t<T1>>, "Complex planar op only works on complex types");
           static_assert(Rank() > 0);
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         };
 
         template <typename CapType, typename... Is>
diff --git a/include/matx/operators/polyval.h b/include/matx/operators/polyval.h
index b028f248..34760ce5 100644
--- a/include/matx/operators/polyval.h
+++ b/include/matx/operators/polyval.h
@@ -56,6 +56,7 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "polyval()"; }
         __MATX_INLINE__ PolyvalOp(const Op &op, const Coeffs &coeffs) : op_(op), coeffs_(coeffs) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           MATX_STATIC_ASSERT_STR(Coeffs::Rank() == 1, matxInvalidDim, "Coefficient must be rank 1");
           MATX_STATIC_ASSERT_STR(Op::Rank() == 1, matxInvalidDim, "Input operator must be rank 1");
         };
diff --git a/include/matx/operators/prod.h b/include/matx/operators/prod.h
index 7a9f41c7..6ac580de 100644
--- a/include/matx/operators/prod.h
+++ b/include/matx/operators/prod.h
@@ -60,6 +60,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "prod(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ ProdOp(const OpA &a) : a_(a) { 
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         for (int r = 0; r < ORank; r++) {
           out_dims_[r] = a_.Size(r);
         }                    
diff --git a/include/matx/operators/pwelch.h b/include/matx/operators/pwelch.h
index 44bbea81..5ec291bd 100644
--- a/include/matx/operators/pwelch.h
+++ b/include/matx/operators/pwelch.h
@@ -72,6 +72,7 @@ namespace matx
               output_scale_mode_(output_scale_mode),
               fs_(fs)
         {
+          MATX_LOG_TRACE("{} constructor: nperseg={}, noverlap={}, nfft={}, fs={}", str(), nperseg, noverlap, nfft, fs);
           MATX_STATIC_ASSERT_STR(OpX::Rank() == 1, matxInvalidDim, "pwelch:  Only input rank of 1 is supported presently");
           for (int r = 0; r < OpX::Rank(); r++) {
             out_dims_[r] = nfft_;
diff --git a/include/matx/operators/qr.h b/include/matx/operators/qr.h
index 69e1b61f..ad3d4490 100644
--- a/include/matx/operators/qr.h
+++ b/include/matx/operators/qr.h
@@ -56,7 +56,9 @@ namespace detail {
       using qr_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "qr(" + get_type_str(a_) + ")"; }
-      __MATX_INLINE__ QROp(const OpA &a) : a_(a) { };
+      __MATX_INLINE__ QROp(const OpA &a) : a_(a) {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+      };
 
       // This should never be called
       template <typename... Is>
@@ -131,7 +133,9 @@ namespace detail {
       using qr_solver_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "qr_solver()"; }
-      __MATX_INLINE__ SolverQROp(const OpA &a) : a_(a) { }    
+      __MATX_INLINE__ SolverQROp(const OpA &a) : a_(a) {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+      }
 
       // This should never be called
       template <typename... Is>
diff --git a/include/matx/operators/r2c.h b/include/matx/operators/r2c.h
index 5d4cbc69..8cd185f1 100644
--- a/include/matx/operators/r2c.h
+++ b/include/matx/operators/r2c.h
@@ -53,6 +53,7 @@ namespace matx
         __MATX_INLINE__ std::string str() const { return "r2c(" + op_.str() + ")"; }
 
         __MATX_INLINE__ R2COp(const T1 &op, index_t orig) : op_(op), orig_size_(orig) {
+          MATX_LOG_TRACE("{} constructor: rank={}, orig_size={}", str(), Rank(), orig);
           static_assert(Rank() >= 1, "R2COp must have a rank 1 operator or higher");
         };
 
diff --git a/include/matx/operators/reduce.h b/include/matx/operators/reduce.h
index 0b5e32db..1fd835f3 100644
--- a/include/matx/operators/reduce.h
+++ b/include/matx/operators/reduce.h
@@ -66,6 +66,7 @@ namespace matx
 
         __MATX_INLINE__ ReduceOp(const OpA &A, PermDims perm, ReductionOp rop, bool init) : 
               a_(A), perm_(perm), reduction_op_(rop), init_(init) {
+          MATX_LOG_TRACE("{} constructor: rop={}, init={}", str(), static_cast<int>(rop), init);
           for (int r = 0; r < ORank; r++) {
             out_dims_[r] = a_.Size(r);
           }
diff --git a/include/matx/operators/remap.h b/include/matx/operators/remap.h
index 814fadbf..ea7f4dfb 100644
--- a/include/matx/operators/remap.h
+++ b/include/matx/operators/remap.h
@@ -63,7 +63,9 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "remap(" + op_.str() + ")"; }
 
-	      __MATX_INLINE__ RemapOp(const T &op, IdxType idx) : op_(op), idx_(idx) {};
+	      __MATX_INLINE__ RemapOp(const T &op, IdxType idx) : op_(op), idx_(idx) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+        };
 
         // Only supports one element per thread
         template <typename CapType, typename Op, typename Idx, typename... Is>
diff --git a/include/matx/operators/repmat.h b/include/matx/operators/repmat.h
index af6d1598..0ea2db44 100644
--- a/include/matx/operators/repmat.h
+++ b/include/matx/operators/repmat.h
@@ -67,6 +67,7 @@ namespace matx
           {
             reps_[dim] = reps;
           }
+          MATX_LOG_TRACE("{} constructor: rank={}, reps={}", str(), DIM, reps);
         }
 
         __MATX_INLINE__ RepMatOp(const T1 &op, const cuda::std::array<index_t, DIM> reps) : op_(op)
@@ -75,6 +76,7 @@ namespace matx
           {
             reps_[dim] = reps[dim];
           }
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), DIM);
         }
 
         __MATX_INLINE__ RepMatOp(const T1 &op, const index_t *reps) : op_(op)
diff --git a/include/matx/operators/resample_poly.h b/include/matx/operators/resample_poly.h
index 12e5aa56..752a57d6 100644
--- a/include/matx/operators/resample_poly.h
+++ b/include/matx/operators/resample_poly.h
@@ -66,7 +66,8 @@ namespace detail {
       __MATX_INLINE__ std::string str() const { return "resample_poly(" + get_type_str(a_) + "," + get_type_str(f_) + ")";}
       __MATX_INLINE__ ResamplePolyOp(const OpA &a, const FilterType &f, index_t up, index_t down) : 
           a_(a), f_(f), up_(up), down_(down) 
-      { 
+      {
+        MATX_LOG_TRACE("{} constructor: up={}, down={}", str(), up, down); 
         const index_t up_len = a_.Size(OpA::Rank() - 1) * up_;
         const index_t b_len = up_len / down_ + ((up_len % down_) ? 1 : 0);
 
diff --git a/include/matx/operators/reshape.h b/include/matx/operators/reshape.h
index f470bf2a..073ae45a 100644
--- a/include/matx/operators/reshape.h
+++ b/include/matx/operators/reshape.h
@@ -77,6 +77,7 @@ namespace matx
           }
 
           MATX_ASSERT_STR(size == TotalSize(op_), matxInvalidSize, "ReshapeOp: TotalSize of reshape must match");
+          MATX_LOG_TRACE("{} constructor: rank={}, total_size={}", str(), Rank(), size);
         };
 
         template <typename CapType, typename Op, typename... Is>
diff --git a/include/matx/operators/reverse.h b/include/matx/operators/reverse.h
index 6d27a15e..1fc30e96 100644
--- a/include/matx/operators/reverse.h
+++ b/include/matx/operators/reverse.h
@@ -61,7 +61,9 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "reverse(" + op_.str() + ")"; }
 
-        __MATX_INLINE__ ReverseOp(const T1 &op) : op_(op){};
+        __MATX_INLINE__ ReverseOp(const T1 &op) : op_(op){
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), DIM);
+        };
 
         template <typename CapType, typename Op, typename... Is>
         static __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) get_impl(Op&& op, Is... indices)
diff --git a/include/matx/operators/select.h b/include/matx/operators/select.h
index 9e7f834a..a26e883c 100644
--- a/include/matx/operators/select.h
+++ b/include/matx/operators/select.h
@@ -57,7 +57,9 @@ namespace matx
 
         __MATX_INLINE__ std::string str() const { return "select(" + op_.str() + ")"; }
 
-        __MATX_INLINE__ SelectOp(const T &op, IdxType idx) : op_(op), idx_(idx) {};  
+        __MATX_INLINE__ SelectOp(const T &op, IdxType idx) : op_(op), idx_(idx) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+        };  
 
         template <typename CapType, typename Op, typename Idx, typename... Is>
         static __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) get_impl(Op&& op, const Idx &idx, index_t i)
diff --git a/include/matx/operators/self.h b/include/matx/operators/self.h
index 3bf6ed28..6d2cbddf 100644
--- a/include/matx/operators/self.h
+++ b/include/matx/operators/self.h
@@ -57,7 +57,9 @@ namespace matx
         
         __MATX_INLINE__ std::string str() const { return "self(" + op_.str() + ")"; }
         
-	      __MATX_INLINE__ SelfOp(const T1 &op) : op_(op) {}
+	      __MATX_INLINE__ SelfOp(const T1 &op) : op_(op) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+        }
 
         template <typename CapType, typename... Is>
         __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ decltype(auto) operator()(Is... indices) const 
diff --git a/include/matx/operators/set.h b/include/matx/operators/set.h
index 084917ad..9d096ed4 100644
--- a/include/matx/operators/set.h
+++ b/include/matx/operators/set.h
@@ -98,6 +98,7 @@ class set : public BaseOp<set<T, Op>> {
    */
   inline set(T &out, const Op &op) : out_(out), op_(op)
   {
+    MATX_LOG_TRACE("{} constructor: rank={}", str(), T::Rank());
     static_assert(is_matx_op_lvalue<T>() == true, "Invalid operator on LHS of set/operator=");
     static_assert(!is_matx_transform_op<T>(), "Cannot use transform operator on LHS of assignment");
 
diff --git a/include/matx/operators/shift.h b/include/matx/operators/shift.h
index b19ccf44..5132d643 100644
--- a/include/matx/operators/shift.h
+++ b/include/matx/operators/shift.h
@@ -63,6 +63,7 @@ namespace matx
 
         __MATX_INLINE__ ShiftOp(const T1 &op, T2 shift) : op_(op), shift_(shift)
         {
+          MATX_LOG_TRACE("{} constructor: dim={}, rank={}", str(), DIM, Rank());
           static_assert(DIM < Rank(), "Dimension to shift must be less than rank of tensor");
 
           MATX_LOOP_UNROLL
diff --git a/include/matx/operators/sign.h b/include/matx/operators/sign.h
index 6372d13b..7371811b 100644
--- a/include/matx/operators/sign.h
+++ b/include/matx/operators/sign.h
@@ -56,7 +56,9 @@ namespace matx
         value_type zval_;
 
         __MATX_INLINE__ std::string str() const { return "sign(" + get_type_str(op_) + ")"; }
-        __MATX_INLINE__ SignOp(const T &op, value_type zval) : op_(op), zval_(zval) {};
+        __MATX_INLINE__ SignOp(const T &op, value_type zval) : op_(op), zval_(zval) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+        };
 
         template <typename CapType, typename... Is>
         __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ auto operator()(Is... indices) const 
diff --git a/include/matx/operators/slice.h b/include/matx/operators/slice.h
index 6a62ac56..598f0d9d 100644
--- a/include/matx/operators/slice.h
+++ b/include/matx/operators/slice.h
@@ -108,6 +108,7 @@ namespace matx
             }
           }
           MATX_ASSERT_STR(d==Rank(), matxInvalidDim, "SliceOp: Number of dimensions without matxDropDim must equal new rank.");
+          MATX_LOG_TRACE("{} constructor: input_rank={}, output_rank={}", str(), T::Rank(), DIM);
         };
 
         template <typename CapType, typename Op, typename... Is>
diff --git a/include/matx/operators/softmax.h b/include/matx/operators/softmax.h
index eacff1b1..a239b786 100644
--- a/include/matx/operators/softmax.h
+++ b/include/matx/operators/softmax.h
@@ -63,6 +63,7 @@ namespace matx
 
         __MATX_INLINE__ SoftmaxOp(const OpA &A, PermDims perm) : 
               a_(A), perm_(perm) {
+          MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
           for (int r = 0; r < OpA::Rank(); r++) {
             out_dims_[r] = a_.Size(r);
           }          
diff --git a/include/matx/operators/solve.h b/include/matx/operators/solve.h
index 0bc0abe1..0466c7cd 100644
--- a/include/matx/operators/solve.h
+++ b/include/matx/operators/solve.h
@@ -62,6 +62,7 @@ class SolveOp : public BaseOp<SolveOp<OpA, OpB>> {
   using value_type = typename OpA::value_type;
 
   __MATX_INLINE__ SolveOp(const OpA &a, const OpB &b) : a_(a), b_(b) {
+    MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
     for (int r = 0, rank = Rank(); r < rank; r++) {
       out_dims_[r] = b_.Size(r);
     }
diff --git a/include/matx/operators/sort.h b/include/matx/operators/sort.h
index c76f2cd6..e80545f0 100644
--- a/include/matx/operators/sort.h
+++ b/include/matx/operators/sort.h
@@ -66,6 +66,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "sort()"; }
       __MATX_INLINE__ SortOp(const OpA &a, SortDirection_t dir) : a_(a), dir_(dir) { 
+        MATX_LOG_TRACE("{} constructor: rank={}, dir={}", str(), Rank(), static_cast<int>(dir));
         for (int r = 0; r < Rank(); r++) {
           out_dims_[r] = a_.Size(r);
         }
diff --git a/include/matx/operators/sparse2dense.h b/include/matx/operators/sparse2dense.h
index 6f3e3dc3..c4715d45 100644
--- a/include/matx/operators/sparse2dense.h
+++ b/include/matx/operators/sparse2dense.h
@@ -58,6 +58,7 @@ class Sparse2DenseOp : public BaseOp<Sparse2DenseOp<OpA>> {
   using value_type = typename OpA::value_type;
 
   __MATX_INLINE__ Sparse2DenseOp(const OpA &a) : a_(a) {
+    MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
     for (int r = 0; r < Rank(); r++) {
       out_dims_[r] = a_.Size(r);
     }
diff --git a/include/matx/operators/sparse2sparse.h b/include/matx/operators/sparse2sparse.h
index 1f489556..a05286fb 100644
--- a/include/matx/operators/sparse2sparse.h
+++ b/include/matx/operators/sparse2sparse.h
@@ -51,7 +51,9 @@ class Sparse2SparseOp : public BaseOp<Sparse2SparseOp<OpA>> {
   using tosparse_xform_op = bool;
   using value_type = typename OpA::value_type;
 
-  __MATX_INLINE__ Sparse2SparseOp(const OpA &a) : a_(a) {}
+  __MATX_INLINE__ Sparse2SparseOp(const OpA &a) : a_(a) {
+    MATX_LOG_TRACE("{} constructor: rank={}", str(), OpA::Rank());
+  }
 
   __MATX_INLINE__ std::string str() const {
     return "sparse2sparse(" + get_type_str(a_) + ")";
diff --git a/include/matx/operators/sph2cart.h b/include/matx/operators/sph2cart.h
index 2b320df0..3222d8c8 100644
--- a/include/matx/operators/sph2cart.h
+++ b/include/matx/operators/sph2cart.h
@@ -59,6 +59,7 @@ namespace matx
 
         __MATX_INLINE__ Sph2CartOp(const T1 &theta, const T2 &phi, const T3 &r) : theta_(theta), phi_(phi), r_(r)
       {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         MATX_ASSERT_COMPATIBLE_OP_SIZES(theta);
         MATX_ASSERT_COMPATIBLE_OP_SIZES(phi);
         MATX_ASSERT_COMPATIBLE_OP_SIZES(r);
diff --git a/include/matx/operators/stack.h b/include/matx/operators/stack.h
index 05d23a35..90b392fe 100644
--- a/include/matx/operators/stack.h
+++ b/include/matx/operators/stack.h
@@ -75,6 +75,7 @@ namespace matx
 
       __MATX_INLINE__ StackOp(int axis, const Ts&... ts) : ops_(ts...), axis_(axis)
       {
+        MATX_LOG_TRACE("{} constructor: axis={}, num_tensors={}", str(), axis, sizeof...(Ts));
         static_assert(sizeof...(Ts) > 1, "Must have more than one tensor to stack");
         static_assert((... && (RANK == Ts::Rank())), "stacked ops must have the same rank");
 
diff --git a/include/matx/operators/stdd.h b/include/matx/operators/stdd.h
index 5cf7887a..6d93aef6 100644
--- a/include/matx/operators/stdd.h
+++ b/include/matx/operators/stdd.h
@@ -61,6 +61,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "stdd(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ StddOp(const OpA &a, int ddof) : a_(a),  ddof_(ddof) { 
+        MATX_LOG_TRACE("{} constructor: rank={}, ddof={}", str(), Rank(), ddof);
         for (int r = 0; r < ORank; r++) {
           out_dims_[r] = a_.Size(r);
         }
diff --git a/include/matx/operators/sum.h b/include/matx/operators/sum.h
index 79673e07..6b0f7448 100644
--- a/include/matx/operators/sum.h
+++ b/include/matx/operators/sum.h
@@ -63,6 +63,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "sum(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ SumOp(const OpA &a) : a_(a) { 
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         for (int r = 0; r < ORank; r++) {
           out_dims_[r] = a_.Size(r);
         }
diff --git a/include/matx/operators/svd.h b/include/matx/operators/svd.h
index 6d23201c..3edc0b46 100644
--- a/include/matx/operators/svd.h
+++ b/include/matx/operators/svd.h
@@ -58,7 +58,9 @@ namespace detail {
       using svd_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "svd(" + get_type_str(a_) + ")"; }
-      __MATX_INLINE__ SVDOp(const OpA &a, const SVDMode jobz, const SVDHostAlgo algo) : a_(a), jobz_(jobz), algo_(algo) { };
+      __MATX_INLINE__ SVDOp(const OpA &a, const SVDMode jobz, const SVDHostAlgo algo) : a_(a), jobz_(jobz), algo_(algo) {
+        MATX_LOG_TRACE("{} constructor: jobz={}, algo={}", str(), static_cast<int>(jobz), static_cast<int>(algo));
+      };
 
       // This should never be called
       template <typename... Is>
@@ -167,8 +169,9 @@ namespace detail {
       using svd_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "svdpi(" + get_type_str(a_) + ")"; }
-      __MATX_INLINE__ SVDPIOp(const OpA &a, const OpX &x, int iterations, index_t k) : a_(a), x_(x), iterations_(iterations), k_(k) 
-      { }
+      __MATX_INLINE__ SVDPIOp(const OpA &a, const OpX &x, int iterations, index_t k) : a_(a), x_(x), iterations_(iterations), k_(k)
+      {
+        MATX_LOG_TRACE("{} constructor: iterations={}, k={}", str(), iterations, k); }
 
       // This should never be called
       template <typename... Is>
diff --git a/include/matx/operators/toeplitz.h b/include/matx/operators/toeplitz.h
index 319ca789..daf49e15 100644
--- a/include/matx/operators/toeplitz.h
+++ b/include/matx/operators/toeplitz.h
@@ -76,6 +76,7 @@ namespace matx
 
         __MATX_INLINE__ ToeplitzOp(const T1 &op1, const T2 &op2) : op1_(op1), op2_(op2)
       {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
         if constexpr (is_matx_op<T1>()) {
           static_assert(T1::Rank() == 1, "toeplitz() operator input rank must be 1");
         }
diff --git a/include/matx/operators/trace.h b/include/matx/operators/trace.h
index 89571ada..417038ff 100644
--- a/include/matx/operators/trace.h
+++ b/include/matx/operators/trace.h
@@ -58,7 +58,9 @@ namespace detail {
       using trace_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "trace()"; }
-      __MATX_INLINE__ TraceOp(const OpA &a) : a_(a) {}
+      __MATX_INLINE__ TraceOp(const OpA &a) : a_(a) {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+      }
 
       __MATX_HOST__ __MATX_INLINE__ auto Data() const noexcept { return ptr; }
 
diff --git a/include/matx/operators/transpose.h b/include/matx/operators/transpose.h
index 1f09ccbb..d73b100f 100644
--- a/include/matx/operators/transpose.h
+++ b/include/matx/operators/transpose.h
@@ -68,7 +68,8 @@ namespace detail {
           else {
             out_dims_[r] = a_.Size(r);
           }
-        }        
+        }
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
       }
 
       template <typename CapType, typename... Is>
diff --git a/include/matx/operators/unary_operators.h b/include/matx/operators/unary_operators.h
index 29a43b04..100a8196 100644
--- a/include/matx/operators/unary_operators.h
+++ b/include/matx/operators/unary_operators.h
@@ -73,6 +73,7 @@ namespace matx
     }
 
     __MATX_INLINE__ matxUnaryOp(const I1 &in1, const Op &op) : in1_(in1), op_(op) {
+      MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
       if constexpr (Rank() > 0) {
         for (int32_t i = 0; i < Rank(); i++) {
           size_[i] = get_size(in1_, i);
diff --git a/include/matx/operators/unique.h b/include/matx/operators/unique.h
index 6c88f0fc..9296190d 100644
--- a/include/matx/operators/unique.h
+++ b/include/matx/operators/unique.h
@@ -55,7 +55,9 @@ namespace detail {
       using unique_xform_op = bool;
 
       __MATX_INLINE__ std::string str() const { return "unique()"; }
-      __MATX_INLINE__ UniqueOp(const OpA &a) : a_(a) { };
+      __MATX_INLINE__ UniqueOp(const OpA &a) : a_(a) {
+        MATX_LOG_TRACE("{} constructor: rank={}", str(), Rank());
+      };
 
       // This should never be called
       template <typename... Is>
diff --git a/include/matx/operators/updownsample.h b/include/matx/operators/updownsample.h
index 3d078c19..71a2428d 100644
--- a/include/matx/operators/updownsample.h
+++ b/include/matx/operators/updownsample.h
@@ -66,6 +66,7 @@ namespace matx
         __MATX_INLINE__ std::string str() const { return "upsample(" + op_.str() + ")"; }
 
         __MATX_INLINE__ UpsampleOp(const T &op, int32_t dim, index_t n) : op_(op), dim_(dim), n_(n) {
+          MATX_LOG_TRACE("{} constructor: dim={}, n={}, rank={}", str(), dim, n, Rank());
         };
 
         template <typename CapType, typename... Is>
diff --git a/include/matx/operators/var.h b/include/matx/operators/var.h
index fee055f1..d4810b66 100644
--- a/include/matx/operators/var.h
+++ b/include/matx/operators/var.h
@@ -61,6 +61,7 @@ namespace detail {
 
       __MATX_INLINE__ std::string str() const { return "var(" + get_type_str(a_) + ")"; }
       __MATX_INLINE__ VarOp(const OpA &a, int ddof) : a_(a), ddof_(ddof) { 
+        MATX_LOG_TRACE("{} constructor: rank={}, ddof={}", str(), Rank(), ddof);
         for (int r = 0; r < ORank; r++) {
           out_dims_[r] = a_.Size(r);
         }        
diff --git a/include/matx/operators/zipvec.h b/include/matx/operators/zipvec.h
index 53cea3e4..9f31ac6c 100644
--- a/include/matx/operators/zipvec.h
+++ b/include/matx/operators/zipvec.h
@@ -74,6 +74,7 @@ namespace matx
 
       __MATX_INLINE__ ZipVecOp(const Ts&... ts) : ops_(ts...)
       {
+        MATX_LOG_TRACE("{} constructor: num_ops={}, rank={}", str(), sizeof...(Ts), Rank());
         static_assert(sizeof...(Ts) > 0 && sizeof...(Ts) <= 4, "Must have between 1 and 4 operators for zipvec");
         static_assert((... && (RANK == Ts::Rank())), "zipped ops must have the same rank");
         // All ops must have the same scalar value type; that is enforced by AggregateToVecType
diff --git a/include/matx/transforms/chol/chol_cuda.h b/include/matx/transforms/chol/chol_cuda.h
index c610c717..3bf28ae0 100644
--- a/include/matx/transforms/chol/chol_cuda.h
+++ b/include/matx/transforms/chol/chol_cuda.h
@@ -303,8 +303,10 @@ void chol_impl(OutputTensor &&out, const ATensor &a,
   auto params = detail::matxDnCholCUDAPlan_t<OutputTensor, decltype(tmp_out)>::GetCholParams(tmp_out, uplo_cusolver, exec);
 
   using cache_val_type = detail::matxDnCholCUDAPlan_t<OutputTensor, decltype(tmp_out)>;
+  auto cache_id = detail::GetCacheIdFromType<detail::chol_cuda_cache_t>();
+  MATX_LOG_DEBUG("Cholesky transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::chol_cuda_cache_t>(
-    detail::GetCacheIdFromType<detail::chol_cuda_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(tmp_out, exec, uplo_cusolver);
diff --git a/include/matx/transforms/convert/dense2sparse_cusparse.h b/include/matx/transforms/convert/dense2sparse_cusparse.h
index 230536df..fc658d75 100644
--- a/include/matx/transforms/convert/dense2sparse_cusparse.h
+++ b/include/matx/transforms/convert/dense2sparse_cusparse.h
@@ -310,8 +310,10 @@ void dense2sparse_impl(OutputTensorType &o, const InputTensorType &A,
 
   // Lookup and cache.
   using cache_val_type = detail::Dense2SparseHandle_t<otype, atype>;
+  auto cache_id = detail::GetCacheIdFromType<detail::dense2sparse_cache_t>();
+  MATX_LOG_DEBUG("Dense2Sparse transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::dense2sparse_cache_t>(
-      detail::GetCacheIdFromType<detail::dense2sparse_cache_t>(), params,
+      cache_id, params,
       [&]() { return std::make_shared<cache_val_type>(o, a, stream); },
       [&](std::shared_ptr<cache_val_type> cache_type) {
         cache_type->Exec(o, a);
diff --git a/include/matx/transforms/convert/sparse2dense_cusparse.h b/include/matx/transforms/convert/sparse2dense_cusparse.h
index 183935ca..30a13679 100644
--- a/include/matx/transforms/convert/sparse2dense_cusparse.h
+++ b/include/matx/transforms/convert/sparse2dense_cusparse.h
@@ -257,8 +257,10 @@ void sparse2dense_impl(OutputTensorType &O, const InputTensorType &a,
 
   // Lookup and cache.
   using cache_val_type = detail::Sparse2DenseHandle_t<otype, atype>;
+  auto cache_id = detail::GetCacheIdFromType<detail::sparse2dense_cache_t>();
+  MATX_LOG_DEBUG("Sparse2Dense transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::sparse2dense_cache_t>(
-      detail::GetCacheIdFromType<detail::sparse2dense_cache_t>(), params,
+      cache_id, params,
       [&]() { return std::make_shared<cache_val_type>(o, a, stream); },
       [&](std::shared_ptr<cache_val_type> cache_type) {
         cache_type->Exec(o, a);
diff --git a/include/matx/transforms/convert/sparse2sparse_cusparse.h b/include/matx/transforms/convert/sparse2sparse_cusparse.h
index 1fffeebc..8266f916 100644
--- a/include/matx/transforms/convert/sparse2sparse_cusparse.h
+++ b/include/matx/transforms/convert/sparse2sparse_cusparse.h
@@ -226,8 +226,10 @@ void sparse2sparse_impl(OutputTensorType &o, const InputTensorType &a,
 
   // Lookup and cache.
   using cache_val_type = detail::Sparse2SparseHandle_t<otype, atype>;
+  auto cache_id = detail::GetCacheIdFromType<detail::sparse2sparse_cache_t>();
+  MATX_LOG_DEBUG("Sparse2Sparse transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::sparse2sparse_cache_t>(
-      detail::GetCacheIdFromType<detail::sparse2sparse_cache_t>(), params,
+      cache_id, params,
       [&]() { return std::make_shared<cache_val_type>(o, a, stream); },
       [&](std::shared_ptr<cache_val_type> cache_type) {
         cache_type->Exec(o, a);
diff --git a/include/matx/transforms/cov.h b/include/matx/transforms/cov.h
index 1afcd51d..10161942 100644
--- a/include/matx/transforms/cov.h
+++ b/include/matx/transforms/cov.h
@@ -240,8 +240,10 @@ void cov_impl(TensorTypeC &c, const TensorTypeA &a,
   auto params = detail::matxCovHandle_t<TensorTypeC, TensorTypeA>::GetCovParams(c, a, stream);
 
   using cache_val_type = detail::matxCovHandle_t<TensorTypeC, TensorTypeA>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cov_cache_t>();
+  MATX_LOG_DEBUG("Covariance transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cov_cache_t>(
-    detail::GetCacheIdFromType<detail::cov_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(c, a);
diff --git a/include/matx/transforms/cub.h b/include/matx/transforms/cub.h
index 4ee596a8..447ec9df 100644
--- a/include/matx/transforms/cub.h
+++ b/include/matx/transforms/cub.h
@@ -1502,8 +1502,10 @@ void sort_impl_inner(OutputTensor &a_out, const InputOperator &a,
                             detail::CUB_OP_RADIX_SORT>::GetCubParams(a_out, a, stream);
 
   using cache_val_type = detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_RADIX_SORT, detail::SortParams_t>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+  MATX_LOG_DEBUG("CUB radix sort transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-      detail::GetCacheIdFromType<detail::cub_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(a_out, a, p, stream);
@@ -1701,8 +1703,10 @@ void cub_reduce(OutputTensor &a_out, const InputOperator &a, typename InputOpera
                             detail::CUB_OP_REDUCE,
                             param_type>::GetCubParams(a_out, a, stream);
   using cache_val_type = detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_REDUCE, param_type>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+  MATX_LOG_DEBUG("CUB reduce transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-    detail::GetCacheIdFromType<detail::cub_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(a_out, a, reduce_params, stream);
@@ -1751,8 +1755,10 @@ void cub_sum(OutputTensor &a_out, const InputOperator &a,
                             detail::CUB_OP_REDUCE_SUM>::GetCubParams(a_out, a, stream);
 
   using cache_val_type = detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_REDUCE_SUM, detail::EmptyParams_t>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+  MATX_LOG_DEBUG("CUB reduce sum transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-      detail::GetCacheIdFromType<detail::cub_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(a_out, a, detail::EmptyParams_t{}, stream);
@@ -1796,8 +1802,10 @@ void cub_min(OutputTensor &a_out, const InputOperator &a,
                             detail::CUB_OP_REDUCE_MIN>::GetCubParams(a_out, a, stream);
 
   using cache_val_type = detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_REDUCE_MIN, detail::EmptyParams_t>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+  MATX_LOG_DEBUG("CUB reduce min transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-      detail::GetCacheIdFromType<detail::cub_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(a_out, a, detail::EmptyParams_t{}, stream);
@@ -1842,8 +1850,10 @@ void cub_max(OutputTensor &a_out, const InputOperator &a,
                             detail::CUB_OP_REDUCE_MAX>::GetCubParams(a_out, a, stream);
 
   using cache_val_type = detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_REDUCE_MAX, detail::EmptyParams_t>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+  MATX_LOG_DEBUG("CUB reduce max transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-      detail::GetCacheIdFromType<detail::cub_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(a_out, a, detail::EmptyParams_t{}, stream);
@@ -1907,8 +1917,10 @@ void cub_argreduce(OutputTensor &a_out, TensorIndexType &aidx_out, const InputOp
   #ifndef MATX_DISABLE_CUB_CACHE
     auto params = cache_val_type::GetCubParams(a_out_supported, aidx_out_supported, a_supported, detail::CUB_OP_SINGLE_ARG_REDUCE, stream);
 
+    auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+    MATX_LOG_DEBUG("CUB single arg reduce transform: cache_id={}", cache_id);
     detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-        detail::GetCacheIdFromType<detail::cub_cache_t>(),
+        cache_id,
         params,
         [&]() {
           return std::make_shared<cache_val_type>(a_out_supported, aidx_out_supported, a_supported, reduce_params, stream);
@@ -1976,8 +1988,10 @@ void cub_dualargreduce(OutputTensor &a1_out,
   #ifndef MATX_DISABLE_CUB_CACHE
     auto params = cache_val_type::GetCubParams(a1_out, aidx1_out, a2_out, aidx2_out, a, detail::CUB_OP_DUAL_ARG_REDUCE, stream);
 
+    auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+    MATX_LOG_DEBUG("CUB dual arg reduce transform: cache_id={}", cache_id);
     detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-        detail::GetCacheIdFromType<detail::cub_cache_t>(),
+        cache_id,
         params,
         [&]() {
           return std::make_shared<cache_val_type>(a1_out, aidx1_out, a2_out, aidx2_out, a, reduce_params, stream);
@@ -2256,8 +2270,10 @@ void cumsum_impl(OutputTensor &a_out, const InputOperator &a,
       detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_INC_SUM>::GetCubParams(a_out, a, stream);
 
   using cache_val_type = detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_INC_SUM, detail::EmptyParams_t>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+  MATX_LOG_DEBUG("CUB cumsum transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-      detail::GetCacheIdFromType<detail::cub_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(a_out, a, detail::EmptyParams_t{}, stream);
@@ -2353,8 +2369,10 @@ void hist_impl(OutputTensor &a_out, const InputOperator &a,
                             detail::CUB_OP_HIST_EVEN>::GetCubParams(a_out, a, stream);
 
   using cache_val_type = detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_HIST_EVEN, param_type>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+  MATX_LOG_DEBUG("CUB histogram transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-      detail::GetCacheIdFromType<detail::cub_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(a_out, a, hp, stream);
@@ -2494,8 +2512,10 @@ void find_impl(OutputTensor &a_out, CountTensor &num_found, const InputOperator
                             detail::CUB_OP_SELECT_VALS,
                             param_type>::GetCubParams(a_out, a, stream);
   using cache_val_type = detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_SELECT_VALS, param_type>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+  MATX_LOG_DEBUG("CUB find values transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-      detail::GetCacheIdFromType<detail::cub_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(a_out, a, cparams, stream);
@@ -2613,8 +2633,10 @@ void find_idx_impl(OutputTensor &a_out, CountTensor &num_found, const InputOpera
                             detail::CUB_OP_SELECT_IDX,
                             param_type>::GetCubParams(a_out, a, stream);
   using cache_val_type = detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_SELECT_IDX, param_type>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+  MATX_LOG_DEBUG("CUB find indices transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-      detail::GetCacheIdFromType<detail::cub_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(a_out, a, cparams, stream);
@@ -2732,8 +2754,10 @@ void unique_impl(OutputTensor &a_out, CountTensor &num_found, const InputOperato
                             detail::CUB_OP_UNIQUE,
                             param_type>::GetCubParams(a_out, a, stream);
   using cache_val_type = detail::matxCubPlan_t<OutputTensor, InputOperator, detail::CUB_OP_UNIQUE, param_type>;
+  auto cache_id = detail::GetCacheIdFromType<detail::cub_cache_t>();
+  MATX_LOG_DEBUG("CUB unique transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::cub_cache_t>(
-      detail::GetCacheIdFromType<detail::cub_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(a_out, a, cparams, stream);
diff --git a/include/matx/transforms/eig/eig_cuda.h b/include/matx/transforms/eig/eig_cuda.h
index 572cec94..aa19ee43 100644
--- a/include/matx/transforms/eig/eig_cuda.h
+++ b/include/matx/transforms/eig/eig_cuda.h
@@ -348,8 +348,10 @@ void eig_impl(OutputTensor &&out, WTensor &&w,
 
   // Get cache or new eigen plan if it doesn't exist
   using cache_val_type = detail::matxDnEigCUDAPlan_t<OutputTensor, decltype(w_new), decltype(a_new)>;
+  auto cache_id = detail::GetCacheIdFromType<detail::eig_cuda_cache_t>();
+  MATX_LOG_DEBUG("Eigenvalue transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::eig_cuda_cache_t>(
-    detail::GetCacheIdFromType<detail::eig_cuda_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(w_new, tv, exec, jobz_cusolver, uplo_cusolver);
diff --git a/include/matx/transforms/einsum.h b/include/matx/transforms/einsum.h
index dc3dc10a..8ac230ea 100644
--- a/include/matx/transforms/einsum.h
+++ b/include/matx/transforms/einsum.h
@@ -585,8 +585,10 @@ namespace cutensor {
 
     params.stream = stream;
 
+    auto cache_id = detail::GetCacheIdFromType<einsum_cache_t>();
+    MATX_LOG_DEBUG("Einsum transform: cache_id={}", cache_id);
     detail::GetCache().LookupAndExec<einsum_cache_t>(
-        detail::GetCacheIdFromType<einsum_cache_t>(),
+        cache_id,
         params,
         [&]() {
             return cuda::std::apply([&](auto&&... args) {
diff --git a/include/matx/transforms/fft/fft_cuda.h b/include/matx/transforms/fft/fft_cuda.h
index 218f184b..4b31dcc1 100644
--- a/include/matx/transforms/fft/fft_cuda.h
+++ b/include/matx/transforms/fft/fft_cuda.h
@@ -706,8 +706,10 @@ __MATX_INLINE__ void fft_impl(OutputTensor o, const InputTensor i,
   params.stream = stream;
 
   using cache_val_type = detail::matxCUDAFFTPlan1D_t<decltype(out), decltype(in)>;
+  auto cache_id = detail::GetCacheIdFromType<detail::fft_cuda_cache_t>();
+  MATX_LOG_DEBUG("FFT1D forward transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::fft_cuda_cache_t>(
-    detail::GetCacheIdFromType<detail::fft_cuda_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(out, in);
@@ -752,8 +754,10 @@ __MATX_INLINE__ void ifft_impl(OutputTensor o, const InputTensor i,
   params.stream = stream;
 
   using cache_val_type = detail::matxCUDAFFTPlan1D_t<decltype(out), decltype(in)>;
+  auto cache_id = detail::GetCacheIdFromType<detail::fft_cuda_cache_t>();
+  MATX_LOG_DEBUG("FFT1D inverse transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::fft_cuda_cache_t>(
-    detail::GetCacheIdFromType<detail::fft_cuda_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(out, in);
@@ -793,8 +797,10 @@ __MATX_INLINE__ void fft2_impl(OutputTensor o, const InputTensor i, FFTNorm norm
   params.stream = stream;
 
   using cache_val_type = detail::matxCUDAFFTPlan2D_t<decltype(out), decltype(in)>;
+  auto cache_id = detail::GetCacheIdFromType<detail::fft_cuda_cache_t>();
+  MATX_LOG_DEBUG("FFT2D forward transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::fft_cuda_cache_t>(
-    detail::GetCacheIdFromType<detail::fft_cuda_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(out, in);
@@ -835,8 +841,10 @@ __MATX_INLINE__ void ifft2_impl(OutputTensor o, const InputTensor i, FFTNorm nor
 
   // Get cache or new FFT plan if it doesn't exist
   using cache_val_type = detail::matxCUDAFFTPlan2D_t<decltype(out), decltype(in)>;
+  auto cache_id = detail::GetCacheIdFromType<detail::fft_cuda_cache_t>();
+  MATX_LOG_DEBUG("FFT2D inverse transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::fft_cuda_cache_t>(
-    detail::GetCacheIdFromType<detail::fft_cuda_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(out, in);
diff --git a/include/matx/transforms/filter.h b/include/matx/transforms/filter.h
index 51ecbc51..1e5c53d0 100644
--- a/include/matx/transforms/filter.h
+++ b/include/matx/transforms/filter.h
@@ -477,8 +477,10 @@ void filter_impl([[maybe_unused]] OutType &o, [[maybe_unused]] const InType &i,
   params.hash = rhash + nrhash;
 
   using cache_val_type = detail::matxFilter_t<NR, NNR, OutType, InType, FilterType>;
+  auto cache_id = detail::GetCacheIdFromType<detail::filter_cache_t>();
+  MATX_LOG_DEBUG("Filter transform: cache_id={}, NR={}, NNR={}", cache_id, NR, NNR);
   detail::GetCache().LookupAndExec<detail::filter_cache_t>(
-    detail::GetCacheIdFromType<detail::filter_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return matxMakeFilter(o, i, h_rec, h_nonrec);
diff --git a/include/matx/transforms/inverse.h b/include/matx/transforms/inverse.h
index 368c2fe7..e2b9f68f 100644
--- a/include/matx/transforms/inverse.h
+++ b/include/matx/transforms/inverse.h
@@ -593,8 +593,10 @@ void inv_impl(TensorTypeAInv &a_inv, const TensorTypeA &a,
   auto params = detail::matxInversePlan_t<TensorTypeAInv, TensorTypeA, ALGO>::GetInverseParams(a_inv, a, stream);
 
   using cache_val_type = detail::matxInversePlan_t<TensorTypeAInv, TensorTypeA, ALGO>;
+  auto cache_id = detail::GetCacheIdFromType<detail::inv_cache_t>();
+  MATX_LOG_DEBUG("Inverse transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::inv_cache_t>(
-    detail::GetCacheIdFromType<detail::inv_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(a_inv, a, stream);
diff --git a/include/matx/transforms/lu/lu_cuda.h b/include/matx/transforms/lu/lu_cuda.h
index ca3c5fa6..229e8874 100644
--- a/include/matx/transforms/lu/lu_cuda.h
+++ b/include/matx/transforms/lu/lu_cuda.h
@@ -292,8 +292,10 @@ void lu_impl(OutputTensor &&out, PivotTensor &&piv,
 
   // Get cache or new LU plan if it doesn't exist
   using cache_val_type = detail::matxDnLUCUDAPlan_t<OutputTensor, decltype(piv_new), decltype(a_new)>;
+  auto cache_id = detail::GetCacheIdFromType<detail::lu_cuda_cache_t>();
+  MATX_LOG_DEBUG("LU transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::lu_cuda_cache_t>(
-    detail::GetCacheIdFromType<detail::lu_cuda_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(piv_new, tvt, exec);
diff --git a/include/matx/transforms/matmul/matmul_cuda.h b/include/matx/transforms/matmul/matmul_cuda.h
index 7024510a..71060058 100644
--- a/include/matx/transforms/matmul/matmul_cuda.h
+++ b/include/matx/transforms/matmul/matmul_cuda.h
@@ -1230,8 +1230,10 @@ void matmul_impl(TensorTypeC C, const TensorTypeA A,
     params.stream = stream;
 
     using cache_val_type = detail::MatMulCUDAHandle_t<ctype, atype, btype, PROV>;
+    auto cache_id = detail::GetCacheIdFromType<detail::gemm_cuda_cache_t>();
+    MATX_LOG_DEBUG("MatMul transform: cache_id={}", cache_id);
     detail::GetCache().LookupAndExec<detail::gemm_cuda_cache_t>(
-      detail::GetCacheIdFromType<detail::gemm_cuda_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(c, a, b);
diff --git a/include/matx/transforms/matmul/matmul_cusparse.h b/include/matx/transforms/matmul/matmul_cusparse.h
index 3ac0664c..83ca7824 100644
--- a/include/matx/transforms/matmul/matmul_cusparse.h
+++ b/include/matx/transforms/matmul/matmul_cusparse.h
@@ -320,8 +320,10 @@ void sparse_matmul_impl(TensorTypeC &C, const TensorTypeA &a,
 
   // Lookup and cache.
   using cache_val_type = detail::MatMulCUSPARSEHandle_t<ctype, atype, btype>;
+  auto cache_id = detail::GetCacheIdFromType<detail::gemm_cusparse_cache_t>();
+  MATX_LOG_DEBUG("MatMul CUSPARSE transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::gemm_cusparse_cache_t>(
-      detail::GetCacheIdFromType<detail::gemm_cusparse_cache_t>(), params,
+      cache_id, params,
       [&]() {
         return std::make_shared<cache_val_type>(c, a, b, stream, alpha, beta);
       },
diff --git a/include/matx/transforms/matmul/matvec_cusparse.h b/include/matx/transforms/matmul/matvec_cusparse.h
index d3f97979..32207106 100644
--- a/include/matx/transforms/matmul/matvec_cusparse.h
+++ b/include/matx/transforms/matmul/matvec_cusparse.h
@@ -343,8 +343,10 @@ void sparse_matvec_impl(TensorTypeC &C, const TensorTypeA &a,
 
     // Lookup and cache.
     using cache_val_type = detail::MatVecCUSPARSEHandle_t<ctype, atype, btype>;
+    auto cache_id = detail::GetCacheIdFromType<detail::spmv_cusparse_cache_t>();
+    MATX_LOG_DEBUG("MatVec CUSPARSE transform: cache_id={}", cache_id);
     detail::GetCache().LookupAndExec<detail::spmv_cusparse_cache_t>(
-        detail::GetCacheIdFromType<detail::spmv_cusparse_cache_t>(), params,
+        cache_id, params,
         [&]() {
           return std::make_shared<cache_val_type>(c, a, b, stream, alpha, beta);
         },
diff --git a/include/matx/transforms/qr/qr_cuda.h b/include/matx/transforms/qr/qr_cuda.h
index 5d677db4..73bf6686 100644
--- a/include/matx/transforms/qr/qr_cuda.h
+++ b/include/matx/transforms/qr/qr_cuda.h
@@ -475,8 +475,10 @@ void qr_solver_impl(OutTensor &&out, TauTensor &&tau,
 
   // Get cache or new QR plan if it doesn't exist
   using cache_val_type = detail::matxDnQRCUDAPlan_t<OutTensor, decltype(tau_new), decltype(a_new)>;
+  auto cache_id = detail::GetCacheIdFromType<detail::qr_cuda_cache_t>();
+  MATX_LOG_DEBUG("QR transform (full): cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::qr_cuda_cache_t>(
-    detail::GetCacheIdFromType<detail::qr_cuda_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(tau_new, tvt, exec);
@@ -848,8 +850,10 @@ void qr_econ_impl(OutTensor &&out, RTensor &&out_r,
 
   // Get cache or new QR plan if it doesn't exist
   using cache_val_type = detail::matxDnEconQRCUDAPlan_t<OutTensor, RTensor, decltype(tau_new), decltype(a_new)>;
+  auto cache_id = detail::GetCacheIdFromType<detail::qr_cuda_cache_t>();
+  MATX_LOG_DEBUG("QR transform (economic): cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::qr_cuda_cache_t>(
-    detail::GetCacheIdFromType<detail::qr_cuda_cache_t>(),
+    cache_id,
     params,
     [&]() {
       return std::make_shared<cache_val_type>(tau_new, tvt, exec);
diff --git a/include/matx/transforms/solve/solve_cudss.h b/include/matx/transforms/solve/solve_cudss.h
index 2aa32437..568003e3 100644
--- a/include/matx/transforms/solve/solve_cudss.h
+++ b/include/matx/transforms/solve/solve_cudss.h
@@ -286,8 +286,10 @@ void sparse_solve_impl(TensorTypeC &C, const TensorTypeA &a,
 
   // Lookup and cache.
   using cache_val_type = detail::SolveCUDSSHandle_t<ctype, atype, btype>;
+  auto cache_id = detail::GetCacheIdFromType<detail::gemm_cudss_cache_t>();
+  MATX_LOG_DEBUG("Solve CUDSS transform: cache_id={}", cache_id);
   detail::GetCache().LookupAndExec<detail::gemm_cudss_cache_t>(
-      detail::GetCacheIdFromType<detail::gemm_cudss_cache_t>(), params,
+      cache_id, params,
       [&]() { return std::make_shared<cache_val_type>(c, a, b, stream); },
       [&](std::shared_ptr<cache_val_type> cache_type) {
         cache_type->Exec(c, a, b);
diff --git a/include/matx/transforms/svd/svd_cuda.h b/include/matx/transforms/svd/svd_cuda.h
index ac664a37..4b602c39 100644
--- a/include/matx/transforms/svd/svd_cuda.h
+++ b/include/matx/transforms/svd/svd_cuda.h
@@ -1001,8 +1001,10 @@ void svd_impl(UTensor &&u, STensor &&s,
 
     // Get cache or new SVD plan if it doesn't exist
     using cache_val_type = detail::matxDnSVDCUDAPlan_t<decltype(u_in), decltype(s_new), decltype(vt_in), decltype(at_col_maj)>;
+    auto cache_id = detail::GetCacheIdFromType<detail::svd_cuda_cache_t>();
+    MATX_LOG_DEBUG("SVD transform (full): cache_id={}", cache_id);
     detail::GetCache().LookupAndExec<detail::svd_cuda_cache_t>(
-      detail::GetCacheIdFromType<detail::svd_cuda_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(u_in, s_new, vt_in, at_col_maj, method, exec, job_cusolver);
@@ -1039,8 +1041,10 @@ void svd_impl(UTensor &&u, STensor &&s,
 
     // Get cache or new SVD plan if it doesn't exist
     using cache_val_type = detail::matxDnSVDCUDAPlan_t<decltype(u_col_maj), decltype(s_new), decltype(vt_col_maj), decltype(tvt)>;
+    auto cache_id = detail::GetCacheIdFromType<detail::svd_cuda_cache_t>();
+    MATX_LOG_DEBUG("SVD transform (vectors): cache_id={}", cache_id);
     detail::GetCache().LookupAndExec<detail::svd_cuda_cache_t>(
-      detail::GetCacheIdFromType<detail::svd_cuda_cache_t>(),
+      cache_id,
       params,
       [&]() {
         return std::make_shared<cache_val_type>(u_col_maj, s_new, vt_col_maj, tvt, method, exec, job_cusolver);