Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RPP Tensor Audio Support - MelFilterBank #332

Merged
merged 210 commits into from
May 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
210 commits
Select commit Hold shift + click to select a range
6627464
Initial commit - Non slient region detection
snehaa8 Aug 3, 2023
dcd9833
Initial commit - To Decibels
snehaa8 Aug 3, 2023
eb9e6eb
Intial commit - pre_emphasis_filter
HazarathKumarM Aug 5, 2023
7983150
Intial commit - down_mixing
HazarathKumarM Aug 5, 2023
568cefb
Intial commit - slice_audio
HazarathKumarM Aug 5, 2023
2983867
Intial commit - mel_filter_bank
HazarathKumarM Aug 5, 2023
3720c8f
Replace vectors with arrays
snehaa8 Aug 17, 2023
ed1e425
Cleanup
snehaa8 Aug 17, 2023
055fd59
Minor cleanup
snehaa8 Aug 18, 2023
0b93dc2
Optimize downmixing Kernel
snehaa8 Aug 21, 2023
38e5248
Remove unused variables in header file
snehaa8 Aug 21, 2023
9e213c5
Add axes parameter
snehaa8 Aug 23, 2023
f7f51c8
Replace Rpp64s with Rpp32s
snehaa8 Aug 24, 2023
6651256
Replace vectors with arrays
snehaa8 Aug 24, 2023
7f40dd8
Cleanup
snehaa8 Aug 24, 2023
676de7c
Optimize and precompute cutOff
snehaa8 Aug 25, 2023
46b8fb5
Fix buffer used
snehaa8 Sep 4, 2023
11fb3ab
Fix buffer used
snehaa8 Sep 4, 2023
8e05043
Additional Cleanup
snehaa8 Sep 4, 2023
5018e2a
Fix buffer allocation
snehaa8 Sep 4, 2023
41a6b31
Optimize post incrmeent operation
snehaa8 Sep 4, 2023
195ccfc
Optimize post increment operation
snehaa8 Sep 4, 2023
eefa6b2
Optimize post increment operation
snehaa8 Sep 4, 2023
79d9e6f
Update testsuite for Audio
snehaa8 Sep 8, 2023
5719db9
code cleanup
HazarathKumarM Sep 12, 2023
4aad4d6
Add Readme file for Audio test suite
HazarathKumarM Sep 12, 2023
abf15d8
changes based on review comments
HazarathKumarM Sep 12, 2023
71ef2f5
minor change
HazarathKumarM Sep 12, 2023
adad92b
Remove unittest folders and updated README.md
HazarathKumarM Sep 14, 2023
8e01e0e
Remove unit tests
HazarathKumarM Sep 14, 2023
69fba3e
minor change
HazarathKumarM Sep 14, 2023
a7b1b22
code cleanup
sampath1117 Sep 20, 2023
441bfac
added common header file for audio helper functions
sampath1117 Sep 20, 2023
c8803c2
Merge remote-tracking branch 'abishek_rpp/master' into sn/nsr_host_te…
sampath1117 Sep 20, 2023
e7169fe
removed unncessary audio wav files
sampath1117 Sep 20, 2023
11b4709
removed log file
sampath1117 Sep 20, 2023
d64bc7b
added doxygen support for audio
sampath1117 Sep 21, 2023
23ba5a5
Merge branch 'sn/nsr_host_tensor' into sn/to_decibels
sampath1117 Sep 22, 2023
337ccc1
added doxygen changes for to_decibels
sampath1117 Sep 22, 2023
d1b5b41
updated test suite support for to_decibels
sampath1117 Sep 22, 2023
223de61
minor change
sampath1117 Sep 22, 2023
bf0a4e0
added doxygen changes for preemphasis filter
sampath1117 Sep 22, 2023
f7d7589
Merge branch 'sn/to_decibels' into sn/pre_emphasis_filter
sampath1117 Sep 25, 2023
3626fc0
updated changes for preemphasis filter in test suite
sampath1117 Sep 25, 2023
512a79f
removed the usage of getMax function and used std::max_element
sampath1117 Sep 27, 2023
6a18957
modularized code in test suite
sampath1117 Sep 27, 2023
c413836
merge with latest changes
sampath1117 Sep 27, 2023
9f6b6d4
minor change
sampath1117 Sep 27, 2023
2247696
minor change
sampath1117 Sep 27, 2023
1ba334f
Merge branch 'sn/to_decibels' into sn/pre_emphasis_filter
sampath1117 Sep 27, 2023
527ed18
minor change
sampath1117 Sep 27, 2023
87b0138
Merge pull request #149 from snehaa8/sn/nsr_host_tensor
r-abishek Sep 27, 2023
8e2975d
resolved codacy warnings
sampath1117 Sep 28, 2023
b9f8c12
Merge pull request #174 from snehaa8/sn/nsr_host_tensor
r-abishek Sep 28, 2023
b22bf93
Codacy fix - Remove unused cpuTime
r-abishek Sep 28, 2023
4a5c357
CMakeLists - Version Update
kiritigowda Oct 23, 2023
2c8a78b
CHANGELOG Updates
kiritigowda Oct 23, 2023
f07786a
merge with latest changes
sampath1117 Oct 24, 2023
54cfa29
resolved issue with file_system dependency in test suite
sampath1117 Oct 24, 2023
c8bd726
Doxygen changes
sampath1117 Oct 24, 2023
92f8ed7
Merge pull request #182 from sampath1117/sr/nsr_pr_changes
r-abishek Oct 25, 2023
c4d8f3d
Merge branch 'develop' into ar/audio_support_1_non_silent_region
r-abishek Nov 1, 2023
1db1425
RPP RICAP Tensor for HOST and HIP (#213)
r-abishek Nov 1, 2023
1d3e7ce
Documentation - Readme & changelog updates (#251)
LisaDelaney Nov 1, 2023
13995ad
Merge branch 'ar/audio_support_1_non_silent_region' into sn/nsr_host_…
sampath1117 Nov 2, 2023
e85f581
added ctests for audio test suite for CI
sampath1117 Nov 2, 2023
296ed72
Merge pull request #187 from snehaa8/sn/nsr_host_tensor
r-abishek Nov 2, 2023
0aec6e1
Cmake mods for ctest
r-abishek Nov 2, 2023
3140717
HOST-only build error bugfix
r-abishek Nov 2, 2023
fe4ef51
Merge branch 'ar/audio_support_1_non_silent_region' into sn/nsr_host_…
sampath1117 Nov 3, 2023
0713890
added qa mode paramter to python audio script
sampath1117 Nov 3, 2023
e98a4e8
minor change
sampath1117 Nov 3, 2023
a1f7366
Documentation - Bump rocm-docs-core[api_reference] from 0.26.0 to 0.2…
dependabot[bot] Nov 3, 2023
749a552
RPP Resize Mirror Normalize Bugfix (#252)
r-abishek Nov 3, 2023
3f5aec6
Merge pull request #189 from snehaa8/sn/nsr_host_tensor
r-abishek Nov 3, 2023
1b466bc
added example for MMS calculation in comments for better understanding
sampath1117 Nov 7, 2023
38119f3
Sphinx - updates (#257)
kiritigowda Nov 7, 2023
b98bb99
updated info used to for running audio test suite
sampath1117 Nov 8, 2023
3795f37
removed bitdepth variable from audio test suite
sampath1117 Nov 10, 2023
58e1ff5
added more information on computing NSR outputs in the example added
sampath1117 Nov 10, 2023
64c52cd
Merge pull request #191 from snehaa8/sn/nsr_host_tensor
r-abishek Nov 13, 2023
6b2add5
Merge branch 'ar/audio_support_1_non_silent_region' of https://github…
r-abishek Nov 14, 2023
7753fda
Merge branch 'ar/audio_support_2_to_decibels' into sn/to_decibels
r-abishek Nov 14, 2023
072cc1e
Merge pull request #150 from snehaa8/sn/to_decibels
r-abishek Nov 14, 2023
e04371d
Merge branch 'ar/audio_support_1_non_silent_region' of https://github…
r-abishek Nov 14, 2023
1f25169
Merge branch 'ar/audio_support_2_to_decibels' of https://github.com/r…
r-abishek Nov 14, 2023
8e16be8
Merge branch 'ar/audio_support_3_pre_emphasis_filter' into sn/pre_emp…
r-abishek Nov 14, 2023
e261ce3
Merge pull request #151 from snehaa8/sn/pre_emphasis_filter
r-abishek Nov 14, 2023
7bdd889
Merge branch 'ar/audio_support_1_non_silent_region' of https://github…
r-abishek Nov 14, 2023
f7d324f
Merge branch 'master' into sn/down_mixing
r-abishek Nov 14, 2023
0630310
Merge branch 'ar/audio_support_2_to_decibels' of https://github.com/r…
r-abishek Nov 14, 2023
e0cac9f
Merge branch 'ar/audio_support_3_pre_emphasis_filter' of https://gith…
r-abishek Nov 14, 2023
d205055
Fix doxygen for decibels
snehaa8 Nov 15, 2023
f9c66a6
Merge pull request #195 from snehaa8/sn/to_decibels
r-abishek Nov 15, 2023
d0d0de1
move tensor_host_audio.cpp to host folder
SundarRajan28 Nov 16, 2023
415ee7b
Merge ar/audio_support_3_pre_emphasis_filter to sn/down_mixing
SundarRajan28 Nov 16, 2023
da528d3
Fix build errors and qa tests in Audio Test suite
snehaa8 Nov 16, 2023
ce13b82
Fix build errors and qa tests in Audio Test suite
snehaa8 Nov 16, 2023
bd81492
Merge remote-tracking branch 'abishek/ar/audio_support_4_down_mixing'…
snehaa8 Nov 16, 2023
a90e280
Merge pull request #197 from snehaa8/sn/to_decibels
r-abishek Nov 16, 2023
5713d1d
Merge branch 'ar/audio_support_2_to_decibels' of https://github.com/r…
r-abishek Nov 16, 2023
8ae8673
Merge branch 'ar/audio_support_3_pre_emphasis_filter' into sn/pre_emp…
r-abishek Nov 16, 2023
9c8ac7f
Merge pull request #198 from snehaa8/sn/pre_emphasis_filter
r-abishek Nov 16, 2023
1b943cf
Add reference output and test samples for downmix
snehaa8 Nov 20, 2023
31921cd
Merge branch 'sn/pre_emphasis_filter' into sn/down_mixing
snehaa8 Nov 20, 2023
21653af
Add down_mix in augmentation list and supported cases
snehaa8 Nov 20, 2023
5c4af0c
move Tensor_host_audio.cpp to host folder
snehaa8 Nov 20, 2023
af8cea3
Merge branch 'sn/down_mixing' into sn/slice_audio
snehaa8 Nov 20, 2023
47bfcd2
fix qa mismatches
snehaa8 Nov 20, 2023
7e073f1
move Tensor_host_audio.cpp to host folder
snehaa8 Nov 20, 2023
e500ffc
Merge slice_audio branch
snehaa8 Nov 20, 2023
efcab0d
fix qa mismatches
snehaa8 Nov 20, 2023
8227726
Merge branch 'master' of https://github.com/GPUOpen-ProfessionalCompu…
r-abishek Nov 22, 2023
783ee98
Remove auto-merge repeated funcs
r-abishek Nov 22, 2023
900672a
Merge branch 'ar/audio_support_2_to_decibels' of https://github.com/r…
r-abishek Nov 22, 2023
7e91a91
Improve clarity of header docs
r-abishek Nov 22, 2023
3b03aad
Remove blank line
r-abishek Nov 22, 2023
2be82b1
Improve clarity on header docs
r-abishek Nov 22, 2023
934276a
Merge branch 'ar/audio_support_2_to_decibels' of https://github.com/r…
r-abishek Nov 22, 2023
433d3f2
Merge latest changes from previousPR
snehaa8 Nov 27, 2023
be61650
Add Doxygen comments
snehaa8 Nov 27, 2023
c44b703
Merge latest changes from previousPR
snehaa8 Nov 27, 2023
6d72867
Add Doxygen comments
snehaa8 Nov 27, 2023
b1b1484
Merge latest changes from previousPR
snehaa8 Nov 27, 2023
5a93889
Add Doxygen comments
snehaa8 Nov 27, 2023
56dce62
minor change
snehaa8 Nov 27, 2023
7a2334b
merge with master
sampath1117 Dec 29, 2023
28a99be
converted golden outputs to binary file for downmixing
sampath1117 Dec 29, 2023
cb082d9
removed old golden output file for preemphasis and todecibels
sampath1117 Jan 5, 2024
e6a3ec3
modified info for downmixing as per new changes
sampath1117 Jan 15, 2024
d37fbe8
formatting changes
sampath1117 Jan 23, 2024
ee5eb16
Merge branch 'master' into sn/down_mixing
sampath1117 Jan 23, 2024
2d67580
Initial commit - Spectrogram
snehaa8 Jan 24, 2024
334b425
Add QA .bin reference file
snehaa8 Jan 29, 2024
a1f4213
License - updates to 2024 and consistency changes (#298)
r-abishek Jan 31, 2024
7096c1d
Test - Update README.md for test_suite (#299)
r-abishek Jan 31, 2024
2f9a611
Address internal review comments
snehaa8 Feb 5, 2024
05dc4fd
Modify cmakelist
snehaa8 Feb 5, 2024
fe1f475
Fix QA mismatch
snehaa8 Feb 5, 2024
07a5f66
Bump rocm-docs-core[api_reference] from 0.33.0 to 0.33.1 in /docs/sph…
dependabot[bot] Feb 6, 2024
a5e5679
Bump rocm-docs-core[api_reference] from 0.33.1 to 0.33.2 in /docs/sph…
dependabot[bot] Feb 7, 2024
e8aa6b2
Update doc codeowners (#303)
samjwu Feb 8, 2024
a921332
Documentation - Bump rocm-docs-core[api_reference] from 0.33.2 to 0.3…
dependabot[bot] Feb 9, 2024
30bed4e
Test suite - upgrade 5 qa perf (#305)
kiritigowda Feb 9, 2024
5c423ab
RPP Color Temperature on HOST and HIP (#271)
r-abishek Feb 9, 2024
df6e2c9
RPP Voxel 3D Tensor Add/Subtract scalar on HOST and HIP (#272)
r-abishek Feb 9, 2024
a4ed137
RPP Magnitude on HOST and HIP (#278)
r-abishek Feb 14, 2024
1976cbf
Bump rocm-docs-core[api_reference] from 0.34.0 to 0.34.2 in /docs/sph…
dependabot[bot] Feb 16, 2024
ec8f2f0
RPP Tensor Audio Support - Down Mixing (#296)
r-abishek Feb 16, 2024
29a5c82
RPP Voxel 3D Tensor Multiply scalar on HOST and HIP (#306)
r-abishek Feb 16, 2024
98a3c82
Test Suite Bugfix (#307)
r-abishek Feb 16, 2024
3d54baf
Merge branch 'develop' of https://github.com/r-abishek/rpp into sn/au…
r-abishek Feb 22, 2024
b9a011d
Merge pull request #223 from snehaa8/sn/audio_spectrogram
r-abishek Feb 22, 2024
203ea70
Fix build errors on OCL backend
snehaa8 Feb 23, 2024
0bc1112
Merge pull request #238 from snehaa8/sn/audio_spectrogram
r-abishek Feb 23, 2024
d375376
Merge branch 'sn/mel_filter_bank' into sn/audio_spectrogram
snehaa8 Feb 29, 2024
b16d1dc
Merge pull request #3 from snehaa8/sn/audio_spectrogram
snehaa8 Feb 29, 2024
0a3e31b
Fix spectrogram
snehaa8 Feb 29, 2024
b821498
Cleanup
snehaa8 Mar 1, 2024
2f00442
Merge remote-tracking branch 'origin' into sn/audio_spectrogram_maste…
snehaa8 Mar 6, 2024
8ca76b1
Fix build error in tensor testsuite
snehaa8 Mar 6, 2024
e817bc1
Merge pull request #245 from snehaa8/sn/audio_spectrogram_master_merge
r-abishek Mar 6, 2024
8ed0f65
Merge branch 'master' of https://github.com/GPUOpen-ProfessionalCompu…
r-abishek Mar 6, 2024
c33af22
Bump rocm-docs-core[api_reference] from 0.35.0 to 0.35.1 in /docs/sph…
dependabot[bot] Mar 6, 2024
7a4fbed
Merge branch 'master' into sn/mel_filter_bank
snehaa8 Mar 8, 2024
14f6334
Bump rocm-docs-core[api_reference] from 0.35.1 to 0.36.0 in /docs/sph…
dependabot[bot] Mar 12, 2024
95c3272
Merge branch 'master' into develop
kiritigowda Mar 12, 2024
da7b501
Merge branch 'ROCm:master' into ar/audio_support_6_spectrogram
r-abishek Mar 15, 2024
ae5f7e9
Merge branch 'develop' of https://github.com/ROCm/rpp into ar/audio_s…
r-abishek Mar 16, 2024
ae7f053
Merge branch 'ar/audio_support_6_spectrogram' of https://github.com/r…
r-abishek Mar 16, 2024
641f653
Docs - Bump rocm-docs-core[api_reference] from 0.36.0 to 0.37.0 in /d…
dependabot[bot] Mar 20, 2024
5568573
Link cleanup (#326)
LisaDelaney Mar 20, 2024
481d5d5
Change to camelCase for variable naming
snehaa8 Mar 20, 2024
2cd88f0
Cleanup testsuite for MFB
snehaa8 Mar 20, 2024
a6749ba
Update notes
LisaDelaney Mar 20, 2024
e126aec
Address review comments
snehaa8 Mar 21, 2024
8b28828
Revert change in runTests.py
snehaa8 Mar 21, 2024
82fae0b
Modified codes to use handle memory
snehaa8 Mar 21, 2024
a255906
Docs - Bump rocm-docs-core[api_reference] from 0.37.0 to 0.37.1 in /d…
dependabot[bot] Mar 22, 2024
49efcb6
Merge pull request #251 from snehaa8/sn/spectrogram_address_review_co…
r-abishek Mar 22, 2024
d3df761
RPP Voxel Flip on HIP and HOST (#285)
r-abishek Mar 23, 2024
ebecb42
RPP Vignette Tensor on HOST and HIP (#311)
r-abishek Mar 23, 2024
1e7df2a
Improve readability and cleanup
snehaa8 Mar 25, 2024
1a99bbf
Replace memset with fill for setting non zero integers
snehaa8 Mar 26, 2024
0a5ff5e
Merge pull request #242 from snehaa8/sn/mel_filter_bank
r-abishek Mar 27, 2024
fc1410b
Bump rocm-docs-core[api_reference] from 0.37.1 to 0.38.0 in /docs/sph…
dependabot[bot] Mar 27, 2024
3d1903c
Move independent computes outside loop
snehaa8 Apr 1, 2024
d95baee
Use memset instead of manually filling values and move this outside loop
snehaa8 Apr 1, 2024
7ad7ab0
Replace c style cast with static cast
snehaa8 Apr 1, 2024
1ac9504
Revert "Use memset instead of manually filling values and move this o…
snehaa8 Apr 1, 2024
3ebd7c3
RPP Tensor Audio Support - Resample (#310)
r-abishek Apr 3, 2024
76f31df
Docs - Missing input and output images for Doxygen (#331)
r-abishek Apr 3, 2024
b83f910
Scratch buffers rename for HOST and HIP (#324)
r-abishek Apr 3, 2024
ebeb131
Update CMakeLists.txt
kiritigowda Apr 3, 2024
347d059
Merge pull request #262 from snehaa8/sn/spectrogram_address_review_co…
r-abishek Apr 3, 2024
cdad921
Merge branch 'develop' into ar/audio_support_6_spectrogram
r-abishek Apr 3, 2024
3d9f0fd
Merge branch 'develop' into ar/audio_support_8_mel_filter_bank
r-abishek Apr 3, 2024
3df1c99
Minor build fix
r-abishek Apr 3, 2024
1147bfe
Update CMakeLists.txt
kiritigowda Apr 12, 2024
fb949e0
Merge branch 'develop' of https://github.com/GPUOpen-ProfessionalComp…
r-abishek Apr 12, 2024
29b185f
Merge branch 'sn/spectrogram_address_review_comments' into sn/mel_fil…
snehaa8 Apr 16, 2024
86113ba
Introduce NFT and NTF layouts for audio kernels
snehaa8 Apr 17, 2024
1f1481d
Set layout for spectrogram and melfilterbank directly in testsuite
snehaa8 Apr 17, 2024
9651409
Remove extra blank line in testsuite
snehaa8 Apr 17, 2024
c902871
Merge pull request #264 from snehaa8/sn/mel_filter_bank
r-abishek Apr 17, 2024
0cd8691
Merge branch 'develop' of https://github.com/ROCm/rpp into ar/audio_s…
r-abishek May 7, 2024
1613aa6
Add parentheses
r-abishek May 8, 2024
5718e73
Optimizations and cleanup
r-abishek May 8, 2024
0ecb859
Merge branch 'develop' of https://github.com/ROCm/rpp into ar/audio_s…
r-abishek May 8, 2024
90c6fdd
Merge branch 'develop' into ar/audio_support_8_mel_filter_bank
r-abishek May 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions include/rppdefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -369,10 +369,13 @@ typedef enum
*/
typedef enum
{
NCHW,
NHWC,
NCDHW,
NDHWC
NCHW, // BatchSize-Channels-Height-Width
NHWC, // BatchSize-Height-Width-Channels
NCDHW, // BatchSize-Channels-Depth-Height-Width
NDHWC, // BatchSize-Depth-Height-Width-Channels
NHW, // BatchSize-Height-Width
NFT, // BatchSize-Frequency-Time -> Frequency Major used for Spectrogram / MelfilterBank
NTF // BatchSize-Time-Frequency -> Time Major used for Spectrogram / MelfilterBank
} RpptLayout;

/*! \brief RPPT Tensor 2D ROI type enum
Expand Down Expand Up @@ -434,6 +437,15 @@ typedef enum
TF, //Time Major
} RpptSpectrogramLayout;

/*! \brief RPPT Mel Scale Formula
* \ingroup group_rppdefs
*/
typedef enum
{
SLANEY = 0, // Follows Slaney’s MATLAB Auditory Modelling Work behavior
HTK, // Follows O’Shaughnessy’s book formula, consistent with Hidden Markov Toolkit(HTK), m = 2595 * log10(1 + (f/700))
} RpptMelScaleFormula;

/*! \brief RPPT Tensor 2D ROI LTRB struct
* \ingroup group_rppdefs
*/
Expand Down
20 changes: 20 additions & 0 deletions include/rppt_tensor_audio_augmentations.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,26 @@ RppStatus rppt_down_mixing_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_
*/
RppStatus rppt_spectrogram_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcLengthTensor, bool centerWindows, bool reflectPadding, Rpp32f *windowFunction, Rpp32s nfft, Rpp32s power, Rpp32s windowLength, Rpp32s windowStep, RpptSpectrogramLayout layout, rppHandle_t rppHandle);

/*! \brief Mel filter bank augmentation HOST backend
* \details Mel filter bank augmentation for audio data
* \param[in] srcPtr source tensor in HOST memory
* \param[in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32, layout - NFT / NTF)
* \param[out] dstPtr destination tensor in HOST memory
* \param[in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32, layout - NFT / NTF)
* \param[in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HOST memory, of size batchSize * 2)
* \param[in] maxFreq maximum frequency if not provided maxFreq = sampleRate / 2
* \param[in] minFreq minimum frequency
* \param[in] melFormula formula used to convert frequencies from hertz to mel and from mel to hertz (SLANEY / HTK)
* \param[in] numFilter number of mel filters
* \param[in] sampleRate sampling rate of the audio
* \param[in] normalize boolean variable that determine whether to normalize weights / not
* \param[in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
* \return A <tt> \ref RppStatus</tt> enumeration.
* \retval RPP_SUCCESS Successful completion.
* \retval RPP_ERROR* Unsuccessful completion.
*/
RppStatus rppt_mel_filter_bank_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcDims, Rpp32f maxFreq, Rpp32f minFreq, RpptMelScaleFormula melFormula, Rpp32s numFilter, Rpp32f sampleRate, bool normalize, rppHandle_t rppHandle);

/*! \brief Resample augmentation on HOST backend
* \details Resample augmentation for audio data
* \param[in] srcPtr source tensor in HOST memory
Expand Down
1 change: 1 addition & 0 deletions src/modules/cpu/host_tensor_audio_augmentations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ SOFTWARE.
#include "kernel/pre_emphasis_filter.hpp"
#include "kernel/down_mixing.hpp"
#include "kernel/spectrogram.hpp"
#include "kernel/mel_filter_bank.hpp"
#include "kernel/resample.hpp"

#endif // HOST_TENSOR_AUDIO_AUGMENTATIONS_HPP
252 changes: 252 additions & 0 deletions src/modules/cpu/kernel/mel_filter_bank.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
/*
MIT License

Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/

#include "rppdefs.h"
#include "rpp_cpu_simd.hpp"
#include "rpp_cpu_common.hpp"

struct BaseMelScale
{
public:
virtual Rpp32f hz_to_mel(Rpp32f hz) = 0;
virtual Rpp32f mel_to_hz(Rpp32f mel) = 0;
virtual ~BaseMelScale() = default;
};

struct HtkMelScale : public BaseMelScale
{
Rpp32f hz_to_mel(Rpp32f hz) { return 1127.0f * std::log(1.0f + (hz / 700.0f)); }
Rpp32f mel_to_hz(Rpp32f mel) { return 700.0f * (std::exp(mel / 1127.0f) - 1.0f); }
public:
~HtkMelScale() {};
};

struct SlaneyMelScale : public BaseMelScale
{
const Rpp32f freqLow = 0;
const Rpp32f fsp = 200.0 / 3.0;
const Rpp32f minLogHz = 1000.0;
const Rpp32f minLogMel = (minLogHz - freqLow) / fsp;
const Rpp32f stepLog = 0.068751777; // Equivalent to std::log(6.4) / 27.0;

const Rpp32f invMinLogHz = 1.0f / 1000.0;
const Rpp32f invStepLog = 1.0f / stepLog;
const Rpp32f invFsp = 1.0f / fsp;

Rpp32f hz_to_mel(Rpp32f hz)
rrawther marked this conversation as resolved.
Show resolved Hide resolved
{
Rpp32f mel = 0.0f;
if (hz >= minLogHz)
mel = minLogMel + std::log(hz * invMinLogHz) * invStepLog;
else
mel = (hz - freqLow) * invFsp;

return mel;
}

Rpp32f mel_to_hz(Rpp32f mel)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

addressed above

{
Rpp32f hz = 0.0f;
if (mel >= minLogMel)
hz = minLogHz * std::exp(stepLog * (mel - minLogMel));
else
hz = freqLow + mel * fsp;
return hz;
}
public:
~SlaneyMelScale() {};
};

RppStatus mel_filter_bank_host_tensor(Rpp32f *srcPtr,
RpptDescPtr srcDescPtr,
Rpp32f *dstPtr,
RpptDescPtr dstDescPtr,
Rpp32s *srcDimsTensor,
Rpp32f maxFreqVal, // check unused
Rpp32f minFreqVal,
RpptMelScaleFormula melFormula,
Rpp32s numFilter,
Rpp32f sampleRate,
bool normalize,
rpp::Handle& handle)
{
BaseMelScale *melScalePtr;
switch(melFormula)
{
case RpptMelScaleFormula::HTK:
melScalePtr = new HtkMelScale;
break;
case RpptMelScaleFormula::SLANEY:
default:
melScalePtr = new SlaneyMelScale();
break;
}
Rpp32u numThreads = handle.GetNumThreads();
Rpp32u batchSize = srcDescPtr->n;
Rpp32f *scratchMem = handle.GetInitHandle()->mem.mcpu.scratchBufferHost;

Rpp32f maxFreq = sampleRate / 2;
Rpp32f minFreq = minFreqVal;

// Convert lower, higher frequencies to mel scale and find melStep
Rpp64f melLow = melScalePtr->hz_to_mel(minFreq);
Rpp64f melHigh = melScalePtr->hz_to_mel(maxFreq);
Rpp64f melStep = (melHigh - melLow) / (numFilter + 1);

omp_set_dynamic(0);
#pragma omp parallel for num_threads(numThreads)
for(int batchCount = 0; batchCount < batchSize; batchCount++)
{
Rpp32f *srcPtrTemp = srcPtr + batchCount * srcDescPtr->strides.nStride;
Rpp32f *dstPtrTemp = dstPtr + batchCount * dstDescPtr->strides.nStride;

// Extract nfft, number of Frames, numBins
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are all of these are computed here inside the loop?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Due to their dependency on batchCount - they are different for different elements in batch.

Rpp32s nfft = (srcDimsTensor[batchCount * 2] - 1) * 2;
Rpp32s numBins = nfft / 2 + 1;
Rpp32s numFrames = srcDimsTensor[batchCount * 2 + 1];

// Find hzStep
Rpp64f hzStep = static_cast<Rpp64f>(sampleRate) / nfft;
Rpp64f invHzStep = 1.0 / hzStep;

// Find fftBinStart and fftBinEnd
Rpp32s fftBinStart = std::ceil(minFreq * invHzStep);
Rpp32s fftBinEnd = std::ceil(maxFreq * invHzStep);
fftBinEnd = std::min(fftBinEnd, numBins);

// Set/Fill normFactors, weightsDown and intervals
Rpp32f *normFactors = scratchMem + (batchCount * numFilter);
std::fill(normFactors, normFactors + numFilter, 1.f); // normFactors contain numFilter values of type float
Rpp32f *weightsDown = scratchMem + (batchSize * numFilter) + (batchCount * numBins);
memset(weightsDown, 0, sizeof(numBins * sizeof(Rpp32f))); // weightsDown contain numBins values of type float
Rpp32s *intervals = reinterpret_cast<Rpp32s *>(weightsDown + (batchSize * numBins));
std::fill(intervals, intervals + numBins, -1); // intervals contain numBins values of type integer

Rpp32s fftBin = fftBinStart;
Rpp64f mel0 = melLow, mel1 = melLow + melStep;
Rpp64f fIter = fftBin * hzStep;
for (int interval = 0; interval < numFilter + 1; interval++, mel0 = mel1, mel1 += melStep)
{
Rpp64f f0 = melScalePtr->mel_to_hz(mel0);
Rpp64f f1 = melScalePtr->mel_to_hz(interval == numFilter ? melHigh : mel1);
Rpp64f slope = 1. / (f1 - f0);

if (normalize && interval < numFilter)
{
Rpp64f f2 = melScalePtr->mel_to_hz(mel1 + melStep);
normFactors[interval] = 2.0 / (f2 - f0);
}

for (; fftBin < fftBinEnd && fIter < f1; fftBin++, fIter = fftBin * hzStep)
{
weightsDown[fftBin] = (f1 - fIter) * slope;
intervals[fftBin] = interval;
}
}

Rpp32u maxFrames = std::min(static_cast<Rpp32u>(numFrames + 8), dstDescPtr->strides.hStride);
Rpp32u maxAlignedLength = maxFrames & ~7;
Rpp32u vectorIncrement = 8;

// Set ROI values in dst buffer to 0.0
for(int i = 0; i < numFilter; i++)
{
Rpp32f *dstPtrRow = dstPtrTemp + i * dstDescPtr->strides.hStride;
Rpp32u vectorLoopCount = 0;
for(; vectorLoopCount < maxAlignedLength; vectorLoopCount += 8)
{
_mm256_storeu_ps(dstPtrRow, avx_p0);
dstPtrRow += 8;
}
for(; vectorLoopCount < maxFrames; vectorLoopCount++)
*dstPtrRow++ = 0.0f;
}

Rpp32u alignedLength = numFrames & ~7;
__m256 pSrc, pDst;
Rpp32f *srcRowPtr = srcPtrTemp + fftBinStart * srcDescPtr->strides.hStride;
for (int64_t fftBin = fftBinStart; fftBin < fftBinEnd; fftBin++)
{
auto filterUp = intervals[fftBin];
auto weightUp = 1.0f - weightsDown[fftBin];
auto filterDown = filterUp - 1;
auto weightDown = weightsDown[fftBin];

if (filterDown >= 0)
{
Rpp32f *dstRowPtrTemp = dstPtrTemp + filterDown * dstDescPtr->strides.hStride;
Rpp32f *srcRowPtrTemp = srcRowPtr;

if (normalize)
weightDown *= normFactors[filterDown];
__m256 pWeightDown = _mm256_set1_ps(weightDown);

int vectorLoopCount = 0;
for(; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrement)
{
pSrc = _mm256_loadu_ps(srcRowPtrTemp);
pSrc = _mm256_mul_ps(pSrc, pWeightDown);
pDst = _mm256_loadu_ps(dstRowPtrTemp);
pDst = _mm256_add_ps(pDst, pSrc);
_mm256_storeu_ps(dstRowPtrTemp, pDst);
dstRowPtrTemp += vectorIncrement;
srcRowPtrTemp += vectorIncrement;
}

for (; vectorLoopCount < numFrames; vectorLoopCount++)
(*dstRowPtrTemp++) += weightDown * (*srcRowPtrTemp++);
}

if (filterUp >= 0 && filterUp < numFilter)
{
Rpp32f *dstRowPtrTemp = dstPtrTemp + filterUp * dstDescPtr->strides.hStride;
Rpp32f *srcRowPtrTemp = srcRowPtr;

if (normalize)
weightUp *= normFactors[filterUp];
__m256 pWeightUp = _mm256_set1_ps(weightUp);

int vectorLoopCount = 0;
for(; vectorLoopCount < alignedLength; vectorLoopCount += vectorIncrement)
{
pSrc = _mm256_loadu_ps(srcRowPtrTemp);
pSrc = _mm256_mul_ps(pSrc, pWeightUp);
pDst = _mm256_loadu_ps(dstRowPtrTemp);
pDst = _mm256_add_ps(pDst, pSrc);
_mm256_storeu_ps(dstRowPtrTemp, pDst);
dstRowPtrTemp += vectorIncrement;
srcRowPtrTemp += vectorIncrement;
}

for (; vectorLoopCount < numFrames; vectorLoopCount++)
(*dstRowPtrTemp++) += weightUp * (*srcRowPtrTemp++);
}

srcRowPtr += srcDescPtr->strides.hStride;
}
}
delete melScalePtr;

return RPP_SUCCESS;
}
40 changes: 40 additions & 0 deletions src/modules/rppt_tensor_audio_augmentations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,46 @@ RppStatus rppt_spectrogram_host(RppPtr_t srcPtr,
}
}

/******************** mel_filter_bank ********************/

RppStatus rppt_mel_filter_bank_host(RppPtr_t srcPtr,
RpptDescPtr srcDescPtr,
RppPtr_t dstPtr,
RpptDescPtr dstDescPtr,
Rpp32s* srcDimsTensor,
Rpp32f maxFreq,
Rpp32f minFreq,
RpptMelScaleFormula melFormula,
Rpp32s numFilter,
Rpp32f sampleRate,
bool normalize,
rppHandle_t rppHandle)
{
if (srcDescPtr->layout != RpptLayout::NFT) return RPP_ERROR_INVALID_SRC_LAYOUT;
if (dstDescPtr->layout != RpptLayout::NFT) return RPP_ERROR_INVALID_DST_LAYOUT;

if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32))
{
mel_filter_bank_host_tensor(static_cast<Rpp32f*>(srcPtr),
srcDescPtr,
static_cast<Rpp32f*>(dstPtr),
dstDescPtr,
srcDimsTensor,
maxFreq,
minFreq,
melFormula,
numFilter,
sampleRate,
normalize,
rpp::deref(rppHandle));
return RPP_SUCCESS;
}
else
{
return RPP_ERROR_NOT_IMPLEMENTED;
}
}

/******************** resample ********************/

RppStatus rppt_resample_host(RppPtr_t srcPtr,
Expand Down
Loading