From dc4200bb82af9314dc0cffc89e68f369e45deab2 Mon Sep 17 00:00:00 2001
From: Swetha B S <swetha@multicorewareinc.com>
Date: Mon, 25 Mar 2024 08:06:06 +0000
Subject: [PATCH] Add MFB to MIVisisonX

---
 amd_openvx_extensions/amd_rpp/CMakeLists.txt  |   1 +
 .../amd_rpp/include/internal_publishKernels.h |   2 +
 .../amd_rpp/include/kernels_rpp.h             |   3 +-
 .../amd_rpp/include/vx_ext_rpp.h              |  30 ++-
 .../source/internal_publishKernels.cpp        |   1 +
 .../amd_rpp/source/kernel_rpp.cpp             |  30 ++-
 .../amd_rpp/source/tensor/MelFilterBank.cpp   | 252 ++++++++++++++++++
 7 files changed, 309 insertions(+), 10 deletions(-)
 create mode 100644 amd_openvx_extensions/amd_rpp/source/tensor/MelFilterBank.cpp
diff --git a/amd_openvx_extensions/amd_rpp/CMakeLists.txt b/amd_openvx_extensions/amd_rpp/CMakeLists.txt
index d09bc0443..028865da6 100644
--- a/amd_openvx_extensions/amd_rpp/CMakeLists.txt
+++ b/amd_openvx_extensions/amd_rpp/CMakeLists.txt
@@ -159,6 +159,7 @@ list(APPEND SOURCES
         source/tensor/WarpAffine.cpp
         source/tensor/SequenceRearrange.cpp
         source/tensor/PreemphasisFilter.cpp
+        source/tensor/MelFilterBank.cpp
         source/kernel_rpp.cpp
         source/internal_publishKernels.cpp
     )
diff --git a/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h b/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h
index 7a028afac..0f4014142 100644
--- a/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h
+++ b/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h
@@ -157,6 +157,7 @@ vx_status WarpAffine_Register(vx_context);
 vx_status SequenceRearrange_Register(vx_context);
 vx_status PreemphasisFilter_Register(vx_context);
 vx_status Spectrogram_Register(vx_context);
+vx_status MelFilterBank_Register(vx_context);
 
 // kernel names
 #define VX_KERNEL_RPP_NOPBATCHPD_NAME                           "org.rpp.NopbatchPD"
@@ -283,5 +284,6 @@ vx_status Spectrogram_Register(vx_context);
 #define VX_KERNEL_RPP_SEQUENCEREARRANGE_NAME                    "org.rpp.SequenceRearrange"
 #define VX_KERNEL_RPP_PREEMPHASISFILTER_NAME                    "org.rpp.PreemphasisFilter"
 #define VX_KERNEL_RPP_SPECTROGRAM_NAME                          "org.rpp.Spectrogram"
+#define VX_KERNEL_RPP_MELFILTERBANK_NAME                        "org.rpp.MelFilterBank"
 
 #endif //_AMDVX_EXT__PUBLISH_KERNELS_H_
diff --git a/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h b/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h
index 8685fcb1b..2f6301486 100644
--- a/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h
+++ b/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h
@@ -149,7 +149,8 @@ extern "C"
         VX_KERNEL_RPP_VIGNETTE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x72,
         VX_KERNEL_RPP_WARPAFFINE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x73,
         VX_KERNEL_RPP_PREEMPHASISFILTER = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x74,
-        VX_KERNEL_RPP_SPECTROGRAM = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x75
+        VX_KERNEL_RPP_SPECTROGRAM = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x75,
+        VX_KERNEL_RPP_MELFILTERBANK = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x76,
     };
 
 #ifdef __cplusplus
diff --git a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h
index 899772047..995556738 100644
--- a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h
+++ b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h
@@ -1881,13 +1881,13 @@ extern "C"
 	/*! \brief [Graph] Produces a spectrogram from a 1D signal.
 	* \ingroup group_amd_rpp
 	* \param [in] graph The handle to the graph.
-	* \param [in] pSrc The input tensor in <tt>\ref VX_TYPE_UINT8</tt> or <tt>\ref VX_TYPE_FLOAT32</tt> or <tt>\ref VX_TYPE_FLOAT16</tt> or <tt>\ref VX_TYPE_INT8</tt> format data.
-	* \param [in] pSrcLength The input tensor of batch size in <tt>unsigned int<tt> containing the roi values for the input in xywh/ltrb format.
-	* \param [out] pDst The output tensor (begin) in <tt>\ref VX_TYPE_UINT8</tt> or <tt>\ref VX_TYPE_FLOAT32</tt> or <tt>\ref VX_TYPE_FLOAT16</tt> or <tt>\ref VX_TYPE_INT8</tt> format data.
+	* \param [in] pSrc The input tensor in <tt>\ref VX_TYPE_FLOAT32</tt> format data.
+	* \param [in] pSrcRoi The input tensor of batch size in <tt>unsigned int<tt> containing the roi values for the input in xywh (w- samples, h - channels) format.
+	* \param [out] pDst The output tensor (begin) in <tt>\ref VX_TYPE_FLOAT32</tt> format data.
 	* \param [in] pDstDims The input tensor of batch size in <tt>unsigned int<tt> containing the roi values for the output tensor in xywh/ltrb format.
 	* \param [in] windowFn The input array in <tt>\ref VX_TYPE_FLOAT32</tt> format containing the samples of the window function that will be multiplied to each extracted window when calculating the STFT.
-	* \param [in] centerWindow The input scalar in <tt>\ref VX_TYPE_BOOL</tt> format containing indicates whether extracted windows should be padded so that the window function is centered at multiples of window_step.
-	* \param [in] reflectPadding The input scalar in <tt>\ref VX_TYPE_BOOL</tt> format containing indicates the padding policy when sampling outside the bounds of the signal.
+	* \param [in] centerWindow The input scalar in <tt>\ref VX_TYPE_BOOL</tt> format indicating whether extracted windows should be padded so that the window function is centered at multiples of window_step.
+	* \param [in] reflectPadding The input scalar in <tt>\ref VX_TYPE_BOOL</tt> format indicating the padding policy when sampling outside the bounds of the signal.
 	* \param [in] spectrogramLayout The input scalar in <tt>\ref VX_TYPE_INT32</tt> format containing the Output spectrogram layout.
 	* \param [in] power The input scalar in <tt>\ref VX_TYPE_INT32</tt> format containing the exponent of the magnitude of the spectrum.
 	* \param [in] nfft The input scalar in <tt>\ref VX_TYPE_INT32</tt> format containing the size of the FFT.
@@ -1895,9 +1895,27 @@ extern "C"
 	* \param [in] windowStep The input array in <tt>\ref VX_TYPE_INT32</tt> format containing the step between the STFT windows in number of samples.
 	* \return A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a successful creation should be checked using <tt>\ref vxGetStatus</tt>.
 	*/
-	SHARED_PUBLIC vx_node VX_API_CALL vxExtRppSpectrogram(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcLength, vx_tensor pDst, vx_tensor pDstDims, vx_array windowFn, vx_scalar centerWindow, vx_scalar reflectPadding, vx_scalar spectrogramLayout,
+	SHARED_PUBLIC vx_node VX_API_CALL vxExtRppSpectrogram(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_tensor pDstDims, vx_array windowFn, vx_scalar centerWindow, vx_scalar reflectPadding, vx_scalar spectrogramLayout,
 														  vx_scalar power, vx_scalar nfft, vx_scalar windowLength, vx_scalar windowStep);
 
+	/*! \brief [Graph] Produces a mel-spectrogram from spectrogram on applying a bank of triangular filters
+	* \ingroup group_amd_rpp
+	* \param [in] graph The handle to the graph.
+	* \param [in] pSrc The input tensor in <tt>\ref VX_TYPE_FLOAT32</tt> format data.
+	* \param [in] pSrcRoi The input tensor of batch size in <tt>unsigned int<tt> containing the roi values for the input in xywh (w- samples, h - channels) format.
+	* \param [out] pDst The output tensor (begin) in <tt>\ref VX_TYPE_FLOAT32</tt> format data.
+	* \param [in] pDstRoi The input tensor of batch size in <tt>unsigned int<tt> containing the roi values for the input in xywh (w- samples, h - channels) format.
+	* \param [in] freqHigh The input array in <tt>\ref VX_TYPE_FLOAT32</tt> format containing the maximum frequency.
+	* \param [in] freqLow The input scalar in <tt>\ref VX_TYPE_FLOAT32</tt> format containing the minimum frequency.
+	* \param [in] melFormula The input scalar in <tt>\ref VX_TYPE_INT32</tt> format indicates the formula used to convert frequencies from hertz to mel and vice versa.
+	* \param [in] nfilter The input scalar in <tt>\ref VX_TYPE_INT32</tt> format containing the number of mel filters.
+	* \param [in] normalize The input scalar in <tt>\ref VX_TYPE_BOOL</tt> format to determine weather to normalize the triangular filter weights by the width of the frequecny bands.
+	* \param [in] sampleRate The input scalar in <tt>\ref VX_TYPE_FLOAT32</tt> format containing the sampling rate of the audio data.
+	* \return A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a successful creation should be checked using <tt>\ref vxGetStatus</tt>.
+	*/
+	SHARED_PUBLIC vx_node VX_API_CALL vxExtRppMelFilterBank(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_tensor pDstRoi, vx_scalar freqHigh,
+															vx_scalar freqLow, vx_scalar melFormula, vx_scalar nfilter, vx_scalar normalize, vx_scalar sampleRate);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp b/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp
index 80fddd977..aef3edfe1 100644
--- a/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp
+++ b/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp
@@ -162,6 +162,7 @@ vx_status get_kernels_to_publish()
     STATUS_ERROR_CHECK(ADD_KERNEL(WarpAffine_Register));
     STATUS_ERROR_CHECK(ADD_KERNEL(PreemphasisFilter_Register));
     STATUS_ERROR_CHECK(ADD_KERNEL(Spectrogram_Register));
+    STATUS_ERROR_CHECK(ADD_KERNEL(MelFilterBank_Register));
 
     return status;
 }
diff --git a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp
index afba949da..74edd86b4 100644
--- a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp
+++ b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp
@@ -2559,7 +2559,7 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppPreemphasisFilter(vx_graph graph, vx_te
     return node;
 }
 
-VX_API_ENTRY vx_node VX_API_CALL vxExtRppSpectrogram(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcLength, vx_tensor pDst, vx_tensor pDstDims, vx_array windowFn, vx_scalar centerWindows, vx_scalar reflectPadding, vx_scalar spectrogramLayout,
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppSpectrogram(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_tensor pDstRoi, vx_array windowFn, vx_scalar centerWindows, vx_scalar reflectPadding, vx_scalar spectrogramLayout,
                                                      vx_scalar power, vx_scalar nfft, vx_scalar windowLength, vx_scalar windowStep) {
     vx_node node = NULL;
     vx_context context = vxGetContext((vx_reference)graph);
@@ -2568,9 +2568,9 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppSpectrogram(vx_graph graph, vx_tensor p
         vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devtype);
         vx_reference params[] = {
             (vx_reference)pSrc,
-            (vx_reference)pSrcLength,
+            (vx_reference)pSrcRoi,
             (vx_reference)pDst,
-            (vx_reference)pDstDims,
+            (vx_reference)pDstRoi,
             (vx_reference)windowFn,
             (vx_reference)centerWindows,
             (vx_reference)reflectPadding,
@@ -2585,6 +2585,30 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppSpectrogram(vx_graph graph, vx_tensor p
     return node;
 }
 
+VX_API_ENTRY vx_node VX_API_CALL vxExtRppMelFilterBank(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_tensor pDstRoi, vx_scalar freqHigh, vx_scalar freqLow, vx_scalar melFormula,
+                                                       vx_scalar nfilter, vx_scalar normalize, vx_scalar sampleRate) {
+    vx_node node = NULL;
+    vx_context context = vxGetContext((vx_reference)graph);
+    if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
+        vx_uint32 devtype = getGraphAffinity(graph);
+        vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devtype);
+        vx_reference params[] = {
+            (vx_reference)pSrc,
+            (vx_reference)pSrcRoi,
+            (vx_reference)pDst,
+            (vx_reference)pDstRoi,
+            (vx_reference)freqHigh,
+            (vx_reference)freqLow,
+            (vx_reference)melFormula,
+            (vx_reference)nfilter,
+            (vx_reference)normalize,
+            (vx_reference)sampleRate,
+            (vx_reference)deviceType};
+        node = createNode(graph, VX_KERNEL_RPP_MELFILTERBANK, params, 11);
+    }
+    return node;
+}
+
 RpptDataType getRpptDataType(vx_enum vxDataType) {
     switch(vxDataType) {
         case vx_type_e::VX_TYPE_FLOAT32:
diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/MelFilterBank.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/MelFilterBank.cpp
new file mode 100644
index 000000000..872cd2442
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/tensor/MelFilterBank.cpp
@@ -0,0 +1,252 @@
+/*
+Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct MelFilterBankLocalData {
+    vxRppHandle *handle;
+    Rpp32u deviceType;
+    RppPtr_t pSrc;
+    RppPtr_t pDst;
+    Rpp32f freqHigh;
+    Rpp32f freqLow;
+    RpptMelScaleFormula melFormula;
+    Rpp32s nfilter;
+    bool normalize;
+    Rpp32f sampleRate;
+    RpptDescPtr pSrcDesc;
+    RpptDescPtr pDstDesc;
+    RpptImagePatch *pSrcDims;
+    size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
+    size_t outputTensorDims[RPP_MAX_TENSOR_DIMS];
+};
+
+void copy_src_dims_and_update_dst_roi(MelFilterBankLocalData *data, RpptROI *src_roi, RpptROI *dst_roi) {
+    for (unsigned i = 0; i < data->inputTensorDims[0]; i++) {
+        data->pSrcDims[i].width = src_roi[i].xywhROI.xy.x;
+        data->pSrcDims[i].height = src_roi[i].xywhROI.xy.y;
+        dst_roi[i].xywhROI.xy.x = src_roi[i].xywhROI.xy.x;
+        dst_roi[i].xywhROI.xy.y = data->nfilter;
+    }
+}
+
+static vx_status VX_CALLBACK refreshMelFilterBank(vx_node node, const vx_reference *parameters, vx_uint32 num, MelFilterBankLocalData *data) {
+    vx_status status = VX_SUCCESS;
+    void *roi_tensor_ptr_src, *roi_tensor_ptr_dst;
+    if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+        return VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+        STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
+        STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr_src, sizeof(&roi_tensor_ptr_src)));
+        STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
+        STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr_dst, sizeof(&roi_tensor_ptr_dst)));
+#endif
+    } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+        STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
+        STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src)));
+        STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
+        STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_dst, sizeof(roi_tensor_ptr_dst)));
+    }
+    RpptROI *src_roi = reinterpret_cast<RpptROI *>(roi_tensor_ptr_src);
+    RpptROI *dst_roi = reinterpret_cast<RpptROI *>(roi_tensor_ptr_dst);
+    copy_src_dims_and_update_dst_roi(data, src_roi, dst_roi);
+    return status;
+}
+
+static vx_status VX_CALLBACK validateMelFilterBank(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
+    vx_status status = VX_SUCCESS;
+    vx_enum scalar_type;
+    STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+    if (scalar_type != VX_TYPE_FLOAT32)
+        return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #4 type=%d (must be size)\n", scalar_type);
+    STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+    if (scalar_type != VX_TYPE_FLOAT32)
+        return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type);
+    STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+    if (scalar_type != VX_TYPE_UINT32)
+        return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type);
+    STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+    if (scalar_type != VX_TYPE_INT32)
+        return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type);
+    STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+    if (scalar_type != VX_TYPE_BOOL)
+        return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #8 type=%d (must be size)\n", scalar_type);
+    STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+    if (scalar_type != VX_TYPE_FLOAT32)
+        return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #9 type=%d (must be size)\n", scalar_type);
+
+    // Check for input parameters
+    size_t num_tensor_dims;
+    STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+    if (num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: MelFilterBank: tensor: #0 dimensions=%lu (must be greater than or equal to 3)\n", num_tensor_dims);
+
+    // Check for output parameters
+    vx_uint8 tensor_fixed_point_position;
+    size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
+    vx_enum tensor_datatype;
+    STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+    if (num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: MelFilterBank: tensor: #2 dimensions=%lu (must be greater than or equal to 3)\n", num_tensor_dims);
+
+    STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+    STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_datatype, sizeof(tensor_datatype)));
+    STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+    STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
+    STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
+    STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_datatype, sizeof(tensor_datatype)));
+    STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
+    return status;
+}
+
+static vx_status VX_CALLBACK processMelFilterBank(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+    RppStatus rpp_status = RPP_SUCCESS;
+    vx_status return_status = VX_SUCCESS;
+    MelFilterBankLocalData *data = NULL;
+    STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+    refreshMelFilterBank(node, parameters, num, data);
+    if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
+#if ENABLE_OPENCL
+        return_status = VX_ERROR_NOT_IMPLEMENTED;
+#elif ENABLE_HIP
+        return_status = VX_ERROR_NOT_IMPLEMENTED;
+#endif
+    } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
+        rpp_status = rppt_mel_filter_bank_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pSrcDims, data->freqHigh, data->freqLow,
+                                               data->melFormula, data->nfilter, data->sampleRate, data->normalize, data->handle->rppHandle);
+        return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+    }
+    return return_status;
+}
+
+static vx_status VX_CALLBACK initializeMelFilterBank(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+    MelFilterBankLocalData *data = new MelFilterBankLocalData;
+    memset(data, 0, sizeof(MelFilterBankLocalData));
+
+    vx_enum input_tensor_datatype, output_tensor_datatype;
+    int mel_formula;
+    STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->freqHigh));
+    STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->freqLow));
+    STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &mel_formula));
+    STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nfilter));
+    STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->normalize));
+    STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->sampleRate));
+    STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[10], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+    data->melFormula = (mel_formula == 0) ? RpptMelScaleFormula::SLANEY : RpptMelScaleFormula::HTK;
+
+    // Querying for input tensor
+    data->pSrcDesc = new RpptDesc;
+    STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
+    STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
+    STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_datatype, sizeof(input_tensor_datatype)));
+    data->pSrcDesc->dataType = getRpptDataType(input_tensor_datatype);
+    data->pSrcDesc->offsetInBytes = 0;
+    fillAudioDescriptionPtrFromDims(data->pSrcDesc, data->inputTensorDims);
+
+    // Querying for output tensor
+    data->pDstDesc = new RpptDesc;
+    STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
+    STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->outputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
+    STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_datatype, sizeof(output_tensor_datatype)));
+    data->pDstDesc->dataType = getRpptDataType(output_tensor_datatype);
+    data->pDstDesc->offsetInBytes = 0;
+    fillAudioDescriptionPtrFromDims(data->pDstDesc, data->outputTensorDims);
+
+    data->pSrcDims = new RpptImagePatch[data->pSrcDesc->n];
+    refreshMelFilterBank(node, parameters, num, data);
+    STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType));
+    STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+    return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeMelFilterBank(vx_node node, const vx_reference *parameters, vx_uint32 num) {
+    MelFilterBankLocalData *data;
+    STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+    STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
+    delete (data->pSrcDims);
+    delete (data->pSrcDesc);
+    delete (data->pDstDesc);
+    delete (data);
+    return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+                                                  vx_bool use_opencl_1_2,               // [input]  false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+                                                  vx_uint32 &supported_target_affinity  // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+) {
+    vx_context context = vxGetContext((vx_reference)graph);
+    AgoTargetAffinityInfo affinity;
+    vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+    if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+        supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+    else
+        supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+    return VX_SUCCESS;
+}
+
+vx_status MelFilterBank_Register(vx_context context) {
+    vx_status status = VX_SUCCESS;
+    // Add kernel to the context with callbacks
+    vx_kernel kernel = vxAddUserKernel(context, "org.rpp.MelFilterBank",
+                                       VX_KERNEL_RPP_MELFILTERBANK,
+                                       processMelFilterBank,
+                                       11,
+                                       validateMelFilterBank,
+                                       initializeMelFilterBank,
+                                       uninitializeMelFilterBank);
+    ERROR_CHECK_OBJECT(kernel);
+    AgoTargetAffinityInfo affinity;
+    vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_HIP
+    vx_bool enableBufferAccess = vx_true_e;
+    if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+        STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+    vx_bool enableBufferAccess = vx_false_e;
+#endif
+    amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+    if (kernel) {
+        STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+        PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+        PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+        PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+        PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+        PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+        PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+        PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+        PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+        PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+        PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+        PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+        PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+    }
+    if (status != VX_SUCCESS) {
+    exit:
+        vxRemoveKernel(kernel);
+        return VX_FAILURE;
+    }
+
+    return status;
+}