ROCm · kiritigowda · May 8, 2024 · Aug 3, 2023 · Aug 3, 2023 · Aug 5, 2023
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -27,7 +27,7 @@ set(CMAKE_CXX_COMPILER clang++)
 set(CMAKE_CXX_STANDARD 17)
 
 # RPP Version
-set(VERSION "1.6.0")
+set(VERSION "1.7.0")
 
 # Set Project Version and Language
 project(rpp VERSION ${VERSION} LANGUAGES CXX)
@@ -300,6 +300,7 @@ message("-- ${White}${PROJECT_NAME} -- Link Libraries: ${LINK_LIBRARY_LIST}${Col
 target_link_libraries(${PROJECT_NAME} ${LINK_LIBRARY_LIST})
 set_target_properties(${PROJECT_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
 set_target_properties(${PROJECT_NAME} PROPERTIES LINKER_LANGUAGE CXX)
+target_link_libraries(${PROJECT_NAME} ${PROJECT_SOURCE_DIR}/libs/third_party/ffts/libffts.a)
 set_target_properties(${PROJECT_NAME} PROPERTIES
                       VERSION ${PROJECT_VERSION}
                       SOVERSION ${PROJECT_VERSION_MAJOR})

diff --git a/include/rppdefs.h b/include/rppdefs.h
@@ -361,10 +361,13 @@ typedef enum
  */
 typedef enum
 {
-    NCHW,
-    NHWC,
-    NCDHW,
-    NDHWC
+    NCHW,   // BatchSize-Channels-Height-Width
+    NHWC,   // BatchSize-Height-Width-Channels
+    NCDHW,  // BatchSize-Channels-Depth-Height-Width
+    NDHWC,  // BatchSize-Depth-Height-Width-Channels
+    NHW,    // BatchSize-Height-Width
+    NFT,    // BatchSize-Frequency-Time -> Frequency Major used for Spectrogram / MelfilterBank
+    NTF     // BatchSize-Time-Frequency -> Time Major used for Spectrogram / MelfilterBank
 } RpptLayout;
 
 /*! \brief RPPT Tensor 2D ROI type enum
@@ -417,6 +420,15 @@ typedef enum
     REFLECT
 } RpptAudioBorderType;
 
+/*! \brief RPPT Mel Scale Formula
+ * \ingroup group_rppdefs
+ */
+typedef enum
+{
+    SLANEY = 0,  // Follows Slaney’s MATLAB Auditory Modelling Work behavior
+    HTK,         // Follows O’Shaughnessy’s book formula, consistent with Hidden Markov Toolkit(HTK), m = 2595 * log10(1 + (f/700))
+} RpptMelScaleFormula;
+
 /*! \brief RPPT Tensor 2D ROI LTRB struct
  * \ingroup group_rppdefs
  */

diff --git a/include/rppt_tensor_audio_augmentations.h b/include/rppt_tensor_audio_augmentations.h
@@ -110,6 +110,47 @@ RppStatus rppt_pre_emphasis_filter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr,
 */
 RppStatus rppt_down_mixing_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcDimsTensor, bool normalizeWeights, rppHandle_t rppHandle);
 
+/*! \brief Produces a spectrogram from a 1D audio buffer on HOST backend
+ * \details Spectrogram for 1D audio buffer
+ * \param [in] srcPtr source tensor in HOST memory
+ * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+ * \param [out] dstPtr destination tensor in HOST memory
+ * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32, layout - NFT / NTF)
+ * \param [in] srcLengthTensor source audio buffer length (1D tensor in HOST memory, of size batchSize)
+ * \param [in] centerWindows indicates whether extracted windows should be padded so that the window function is centered at multiples of window_step
+ * \param [in] reflectPadding indicates the padding policy when sampling outside the bounds of the signal
+ * \param [in] windowFunction samples of the window function that will be multiplied to each extracted window when calculating the Short Time Fourier Transform (STFT)
+ * \param [in] nfft size of the FFT
+ * \param [in] power exponent of the magnitude of the spectrum
+ * \param [in] windowLength window size in number of samples
+ * \param [in] windowStep step between the STFT windows in number of samples
+ * \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
+ * \return A <tt> \ref RppStatus</tt> enumeration.
+ * \retval RPP_SUCCESS Successful completion.
+ * \retval RPP_ERROR* Unsuccessful completion.
+ */
+RppStatus rppt_spectrogram_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcLengthTensor, bool centerWindows, bool reflectPadding, Rpp32f *windowFunction, Rpp32s nfft, Rpp32s power, Rpp32s windowLength, Rpp32s windowStep, rppHandle_t rppHandle);
+
+/*! \brief Mel filter bank augmentation HOST backend
+ * \details Mel filter bank augmentation for audio data
+ * \param[in] srcPtr source tensor in HOST memory
+ * \param[in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32, layout - NFT / NTF)
+ * \param[out] dstPtr destination tensor in HOST memory
+ * \param[in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32, layout - NFT / NTF)
+ * \param[in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HOST memory, of size batchSize * 2)
+ * \param[in] maxFreq maximum frequency if not provided maxFreq = sampleRate / 2
+ * \param[in] minFreq minimum frequency
+ * \param[in] melFormula formula used to convert frequencies from hertz to mel and from mel to hertz (SLANEY / HTK)
+ * \param[in] numFilter number of mel filters
+ * \param[in] sampleRate sampling rate of the audio
+ * \param[in] normalize boolean variable that determine whether to normalize weights / not
+ * \param[in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
+ * \return A <tt> \ref RppStatus</tt> enumeration.
+ * \retval RPP_SUCCESS Successful completion.
+ * \retval RPP_ERROR* Unsuccessful completion.
+ */
+RppStatus rppt_mel_filter_bank_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcDims, Rpp32f maxFreq, Rpp32f minFreq, RpptMelScaleFormula melFormula, Rpp32s numFilter, Rpp32f sampleRate, bool normalize, rppHandle_t rppHandle);
+
 /*! \brief Resample augmentation on HOST backend
 * \details Resample augmentation for audio data
 * \param[in] srcPtr source tensor in HOST memory

diff --git a/include/third_party/ffts/ffts.h b/include/third_party/ffts/ffts.h
@@ -0,0 +1,110 @@
+/*
+
+ This file is part of FFTS.
+
+ Copyright (c) 2012, Anthony M. Blake
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+ 	* Redistributions of source code must retain the above copyright
+ 		notice, this list of conditions and the following disclaimer.
+ 	* Redistributions in binary form must reproduce the above copyright
+ 		notice, this list of conditions and the following disclaimer in the
+ 		documentation and/or other materials provided with the distribution.
+ 	* Neither the name of the organization nor the
+	  names of its contributors may be used to endorse or promote products
+ 		derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#ifndef FFTS_H
+#define FFTS_H
+
+#if defined (_MSC_VER) && (_MSC_VER >= 1020)
+#pragma once
+#endif
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if (defined(_WIN32) || defined(WIN32)) && defined(FFTS_SHARED)
+#  ifdef FFTS_BUILD
+#    define FFTS_API __declspec(dllexport)
+#  else
+#    define FFTS_API __declspec(dllimport)
+#  endif
+#else
+#  if (__GNUC__ >= 4) || defined(HAVE_GCC_VISIBILITY)
+#    define FFTS_API __attribute__ ((visibility("default")))
+#  else
+#    define FFTS_API
+#  endif
+#endif
+
+/* The direction of the transform
+   (i.e, the sign of the exponent in the transform.)
+*/
+#define FFTS_FORWARD (-1)
+#define FFTS_BACKWARD (+1)
+
+struct _ffts_plan_t;
+typedef struct _ffts_plan_t ffts_plan_t;
+
+/* Complex data is stored in the interleaved format
+   (i.e, the real and imaginary parts composing each
+   element of complex data are stored adjacently in memory)
+
+   The multi-dimensional arrays passed are expected to be
+   stored as a single contiguous block in row-major order
+*/
+FFTS_API ffts_plan_t*
+ffts_init_1d(size_t N, int sign);
+
+FFTS_API ffts_plan_t*
+ffts_init_2d(size_t N1, size_t N2, int sign);
+
+FFTS_API ffts_plan_t*
+ffts_init_nd(int rank, size_t *Ns, int sign);
+
+/* For real transforms, sign == FFTS_FORWARD implies a real-to-complex
+   forwards tranform, and sign == FFTS_BACKWARD implies a complex-to-real
+   backwards transform.
+
+   The output of a real-to-complex transform is N/2+1 complex numbers,
+   where the redundant outputs have been omitted.
+*/
+FFTS_API ffts_plan_t*
+ffts_init_1d_real(size_t N, int sign);
+
+FFTS_API ffts_plan_t*
+ffts_init_2d_real(size_t N1, size_t N2, int sign);
+
+FFTS_API ffts_plan_t*
+ffts_init_nd_real(int rank, size_t *Ns, int sign);
+
+FFTS_API void
+ffts_execute(ffts_plan_t *p, const void *input, void *output);
+
+FFTS_API void
+ffts_free(ffts_plan_t *p);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* FFTS_H */
diff --git a/include/third_party/ffts/ffts_attributes.h b/include/third_party/ffts/ffts_attributes.h
@@ -0,0 +1,111 @@
+/*
+
+ This file is part of FFTS -- The Fastest Fourier Transform in the South
+
+ Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
+ Copyright (c) 2012, The University of Waikato
+
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+ 	* Redistributions of source code must retain the above copyright
+ 		notice, this list of conditions and the following disclaimer.
+ 	* Redistributions in binary form must reproduce the above copyright
+ 		notice, this list of conditions and the following disclaimer in the
+ 		documentation and/or other materials provided with the distribution.
+ 	* Neither the name of the organization nor the
+	  names of its contributors may be used to endorse or promote products
+ 		derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#ifndef FFTS_ATTRIBUTES_H
+#define FFTS_ATTRIBUTES_H
+
+#if defined (_MSC_VER) && (_MSC_VER >= 1020)
+#pragma once
+#endif
+
+/* Macro definitions for various function/variable attributes */
+#ifdef __GNUC__
+#define GCC_VERSION_AT_LEAST(x,y) \
+	(__GNUC__ > x || __GNUC__ == x && __GNUC_MINOR__ >= y)
+#else
+#define GCC_VERSION_AT_LEAST(x,y) 0
+#endif
+
+#ifdef __GNUC__
+#define FFTS_ALIGN(x) __attribute__((aligned(x)))
+#elif defined(_MSC_VER)
+#define FFTS_ALIGN(x) __declspec(align(x))
+#else
+#define FFTS_ALIGN(x)
+#endif
+
+#if GCC_VERSION_AT_LEAST(3,1)
+#define FFTS_ALWAYS_INLINE __attribute__((always_inline)) inline
+#elif defined(_MSC_VER)
+#define FFTS_ALWAYS_INLINE __forceinline
+#else
+#define FFTS_ALWAYS_INLINE inline
+#endif
+
+#if defined(_MSC_VER)
+#define FFTS_INLINE __inline
+#else
+#define FFTS_INLINE inline
+#endif
+
+#if defined(__GNUC__)
+#define FFTS_RESTRICT __restrict
+#elif defined(_MSC_VER)
+#define FFTS_RESTRICT __restrict
+#else
+#define FFTS_RESTRICT
+#endif
+
+#if GCC_VERSION_AT_LEAST(4,5)
+#define FFTS_ASSUME(cond) do { if (!(cond)) __builtin_unreachable(); } while (0)
+#elif defined(_MSC_VER)
+#define FFTS_ASSUME(cond) __assume(cond)
+#else
+#define FFTS_ASSUME(cond)
+#endif
+
+#if GCC_VERSION_AT_LEAST(4,7)
+#define FFTS_ASSUME_ALIGNED_16(x) __builtin_assume_aligned(x, 16)
+#else
+#define FFTS_ASSUME_ALIGNED_16(x) x
+#endif
+
+#if GCC_VERSION_AT_LEAST(4,7)
+#define FFTS_ASSUME_ALIGNED_32(x) __builtin_assume_aligned(x, 32)
+#else
+#define FFTS_ASSUME_ALIGNED_32(x) x
+#endif
+
+#if defined(__GNUC__)
+#define FFTS_LIKELY(cond) __builtin_expect(!!(cond), 1)
+#else
+#define FFTS_LIKELY(cond) cond
+#endif
+
+#if defined(__GNUC__)
+#define FFTS_UNLIKELY(cond) __builtin_expect(!!(cond), 0)
+#else
+#define FFTS_UNLIKELY(cond) cond
+#endif
+
+#endif /* FFTS_ATTRIBUTES_H */
diff --git a/libs/third_party/ffts/libffts.a b/libs/third_party/ffts/libffts.a
diff --git a/src/modules/cpu/host_tensor_audio_augmentations.hpp b/src/modules/cpu/host_tensor_audio_augmentations.hpp
@@ -29,6 +29,8 @@ SOFTWARE.
 #include "kernel/to_decibels.hpp"
 #include "kernel/pre_emphasis_filter.hpp"
 #include "kernel/down_mixing.hpp"
+#include "kernel/spectrogram.hpp"
+#include "kernel/mel_filter_bank.hpp"
 #include "kernel/resample.hpp"
 
 #endif // HOST_TENSOR_AUDIO_AUGMENTATIONS_HPP