Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Audio PR - Augmentation support [ Spectrogram ] #1355

Merged
merged 68 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
68 commits
Select commit Hold shift + click to select a range
83d5cbf
Bump rocm-docs-core[api_reference] from 0.34.0 to 0.34.2 in /docs/sph…
dependabot[bot] Feb 16, 2024
886684e
Bump cryptography from 42.0.0 to 42.0.2 in /docs/sphinx (#1289)
dependabot[bot] Feb 18, 2024
cf47a7f
Merge branch 'develop' of https://github.com/ROCm/MIVisionX into develop
Mar 13, 2024
f196208
Add PreEmphasis filter support
Mar 13, 2024
1e89c02
Fix ROI - change from xy to width & height
swetha097 Mar 13, 2024
d53f81d
Adding openvx changes for downmix node
SundarRajan28 Mar 13, 2024
9cef1ab
Audio Augmentations 1 PR - NSR and Spectrogram
swetha097 Mar 14, 2024
43dbde3
Revert "Adding openvx changes for downmix node"
swetha097 Mar 14, 2024
b19a9ad
PR comments resolution in the PreEmphais Filter
swetha097 Mar 19, 2024
e92ffe4
Minor Changes
swetha097 Mar 20, 2024
b94144c
Change the borderType enum to int32 from uint32 dtype
swetha097 Mar 21, 2024
5808d6b
Fix validation of preemphasis
swetha097 Mar 21, 2024
33eb1ce
Remove the memcopy of the src and dest rois as it can be handled in t…
swetha097 Mar 21, 2024
8c4a98e
Formatting change - minor
swetha097 Mar 21, 2024
a58818e
Remove NSR
swetha097 Mar 22, 2024
e76b212
Merge branch 'swbs_m1/audio/pr3' into swbs_m2/audio/pr5
swetha097 Mar 22, 2024
c6dcd6b
Minor formatting changes
swetha097 Mar 22, 2024
4e3654a
Minor fix
swetha097 Mar 22, 2024
e197789
Minor update - remove the 2nd instance of preemphasis filter
swetha097 Mar 22, 2024
dba24ba
Enum dtype - change from uint to int
swetha097 Mar 22, 2024
6615b27
Remove roi_tensor_ptr_dst as its unused after latest changes
swetha097 Mar 22, 2024
84100bd
Remove the dst_roi arg from vxExtRppPreemphasisFilter call as its unused
swetha097 Mar 22, 2024
dc4200b
Add MFB to MIVisisonX
swetha097 Mar 25, 2024
8c50b93
Revert "Add MFB to MIVisisonX"
swetha097 Mar 25, 2024
0bc6e73
Merge branch 'swbs_m1/audio/pr3' into swbs_m2/audio/pr5
swetha097 Mar 25, 2024
854aa50
Resolve the PR comments
swetha097 Mar 25, 2024
886d6af
Change the dims[0] and dims[1] positioning for Spectrogram and AudioF…
swetha097 Mar 25, 2024
86aa2d3
Change function name to camelCase
swetha097 Mar 25, 2024
704807f
Revert "Change the dims[0] and dims[1] positioning for Spectrogram an…
swetha097 Mar 26, 2024
e766dff
Fix Spectrogram
Mar 26, 2024
7f9e423
Merge branch 'swbs_m2/audio/pr5' of https://github.com/swetha097/MIVi…
Mar 26, 2024
dd6eceb
Docs - update TOC for API Ref (#1327)
randyh62 Apr 9, 2024
724ebbf
Bump rocm-docs-core[api_reference] from 0.38.0 to 0.38.1 in /docs/sph…
dependabot[bot] Apr 11, 2024
a592da3
Update CHANGELOG.md
swetha097 Apr 11, 2024
d15cd67
Merge remote-tracking branch 'swe_fork/swbs_m1/audio/pr3' into swbs_m…
swetha097 Apr 11, 2024
3252e48
Update CHANGELOG.md
swetha097 Apr 11, 2024
f387e49
Documents - Bump idna from 3.4 to 3.7 in /docs/sphinx (#1330)
dependabot[bot] Apr 12, 2024
3a09b7f
Merge remote-tracking branch 'upstream/develop' into swbs_m1/audio/pr3
SundarRajan28 Apr 17, 2024
ca43743
Merge branch 'swbs_m1/audio/pr3' into swbs_m2/audio/pr5
SundarRajan28 Apr 17, 2024
d0691b6
Update changelog
SundarRajan28 Apr 17, 2024
f46e48e
Merge branch 'swbs_m1/audio/pr3' into swbs_m2/audio/pr5
SundarRajan28 Apr 17, 2024
80b55a5
Merge remote-tracking branch 'upstream/develop' into HEAD
swetha097 Apr 18, 2024
1e92a6c
Merge branch 'develop' into swbs_m1/audio/pr3
swetha097 Apr 18, 2024
83991ea
Resolve minor PR comments
swetha097 Apr 18, 2024
731d7a6
Merge branch 'swbs_m1/audio/pr3' into swbs_m2/audio/pr5
swetha097 Apr 18, 2024
5112ca8
Remove comments
swetha097 Apr 18, 2024
5532aae
Docs - Bump tqdm from 4.65.0 to 4.66.3 in /docs/sphinx (#1339)
dependabot[bot] May 3, 2024
bad8d31
Docs - Bump jinja2 from 3.1.3 to 3.1.4 in /docs/sphinx (#1340)
dependabot[bot] May 6, 2024
7ef3c3c
Find Half - Fix (#1341)
kiritigowda May 7, 2024
2358357
MIVisionX Setup - Updates (#1343)
kiritigowda May 8, 2024
7e8fbf5
Merge remote-tracking branch 'upstream/develop' into swbs_m2/audio/pr5
swetha097 May 8, 2024
cab620b
SWDEV-459739 - Remove the package obsolete setting (#1345)
raramakr May 9, 2024
efdc885
Fix the layout issue with spec
swetha097 May 9, 2024
b4f93d1
Add layouts for Audio in vxTensorLayout
fiona-gladwin May 9, 2024
1d52b52
Merge branch 'swbs_m2/audio/pr5' of https://github.com/swetha097/MIVi…
fiona-gladwin May 9, 2024
212df74
Check the validity of pointers
swetha097 May 9, 2024
fccd77d
Audio PR - Augmentation support [ Spectrogram ] (#1319)
swetha097 May 9, 2024
33a04d3
Introduce API to obtain RPP layout
fiona-gladwin May 9, 2024
c396ead
Merge branch 'swbs_m2/audio/pr5' into swbs_m2/audio/pr5_layout
fiona-gladwin May 10, 2024
beba826
Add comments
fiona-gladwin May 10, 2024
5b75eed
Merge branch 'develop' of https://github.com/GPUOpen-ProfessionalComp…
fiona-gladwin May 10, 2024
9cf73d6
Merge branch 'swbs_m2/audio/pr5_layout' into swbs_m2/audio/pr5
fiona-gladwin May 17, 2024
b0f7289
Merge branch 'develop' of https://github.com/GPUOpen-ProfessionalComp…
fiona-gladwin May 20, 2024
2f33913
Use RPP_AUDIO flag to disable RPP audio calls
fiona-gladwin May 20, 2024
a525fce
Add Audio flag for PreEmphasis filter
fiona-gladwin May 20, 2024
89237f3
Merge branch 'develop' into swbs_m2/audio/pr5
SundarRajan28 May 22, 2024
13989e7
Merge branch 'develop' into swbs_m2/audio/pr5
SundarRajan28 May 24, 2024
7ac933f
Merge branch 'develop' into swbs_m2/audio/pr5
LakshmiKumar23 May 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Documentation for MIVisionX is available at

* Support for advanced GPUs
* Support for PreEmphasis Filter augmentation in openVX extensions
* Support for Spectrogram augmentation in openVX extensions

### Optimizations

Expand Down
1 change: 1 addition & 0 deletions amd_openvx_extensions/amd_rpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ list(APPEND SOURCES
source/tensor/Saturation.cpp
source/tensor/SequenceRearrange.cpp
source/tensor/Snow.cpp
source/tensor/Spectrogram.cpp
source/tensor/Vignette.cpp
source/tensor/WarpAffine.cpp
source/tensor/SequenceRearrange.cpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ vx_status Snow_Register(vx_context);
vx_status Vignette_Register(vx_context);
vx_status WarpAffine_Register(vx_context);
vx_status SequenceRearrange_Register(vx_context);
vx_status Spectrogram_Register(vx_context);

// kernel names
#define VX_KERNEL_RPP_NOPBATCHPD_NAME "org.rpp.NopbatchPD"
Expand Down Expand Up @@ -274,12 +275,13 @@ vx_status SequenceRearrange_Register(vx_context);
#define VX_KERNEL_RPP_PIXELATE_NAME "org.rpp.Pixelate"
#define VX_KERNEL_RPP_VIGNETTE_NAME "org.rpp.Vignette"
#define VX_KERNEL_RPP_WARPAFFINE_NAME "org.rpp.WarpAffine"
#define VX_KERNEL_RPP_BRIGHTNESS_NAME "org.rpp.Brightness"
#define VX_KERNEL_RPP_COPY_NAME "org.rpp.Copy"
#define VX_KERNEL_RPP_CROPMIRRORNORMALIZE_NAME "org.rpp.CropMirrorNormalize"
#define VX_KERNEL_RPP_NOP_NAME "org.rpp.Nop"
#define VX_KERNEL_RPP_RESIZE_NAME "org.rpp.Resize"
#define VX_KERNEL_RPP_SEQUENCEREARRANGE_NAME "org.rpp.SequenceRearrange"
#define VX_KERNEL_RPP_PREEMPHASISFILTER_NAME "org.rpp.PreemphasisFilter"
#define VX_KERNEL_RPP_BRIGHTNESS_NAME "org.rpp.Brightness"
#define VX_KERNEL_RPP_COPY_NAME "org.rpp.Copy"
#define VX_KERNEL_RPP_CROPMIRRORNORMALIZE_NAME "org.rpp.CropMirrorNormalize"
#define VX_KERNEL_RPP_NOP_NAME "org.rpp.Nop"
#define VX_KERNEL_RPP_RESIZE_NAME "org.rpp.Resize"
#define VX_KERNEL_RPP_SEQUENCEREARRANGE_NAME "org.rpp.SequenceRearrange"
#define VX_KERNEL_RPP_PREEMPHASISFILTER_NAME "org.rpp.PreemphasisFilter"
#define VX_KERNEL_RPP_SPECTROGRAM_NAME "org.rpp.Spectrogram"

#endif //_AMDVX_EXT__PUBLISH_KERNELS_H_
8 changes: 6 additions & 2 deletions amd_openvx_extensions/amd_rpp/include/internal_rpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,20 @@ enum vxTensorLayout {
VX_NHWC = 0,
VX_NCHW = 1,
VX_NFHWC = 2,
VX_NFCHW = 3
VX_NFCHW = 3,
VX_NHW = 4, // Audio/2D layout
VX_NFT = 5, // Frequency major, Used for Spectrogram/MelFilterBank
VX_NTF = 6 // Time major, Used for Spectrogram/MelFilterBank
};

//! Brief The utility functions
vx_node createNode(vx_graph graph, vx_enum kernelEnum, vx_reference params[], vx_uint32 num);
vx_status createRPPHandle(vx_node node, vxRppHandle ** pHandle, Rpp32u batchSize, Rpp32u deviceType);
vx_status releaseRPPHandle(vx_node node, vxRppHandle * handle, Rpp32u deviceType);
void fillDescriptionPtrfromDims(RpptDescPtr &descPtr, vxTensorLayout layout, size_t *tensorDims);
void fillAudioDescriptionPtrFromDims(RpptDescPtr &descPtr, size_t *tensorDims);
void fillAudioDescriptionPtrFromDims(RpptDescPtr &descPtr, size_t *tensorDims, vxTensorLayout layout = vxTensorLayout::VX_NHW);
RpptDataType getRpptDataType(vx_enum dataType);
RpptLayout getRpptLayout(vxTensorLayout layout);

class Kernellist
{
Expand Down
3 changes: 2 additions & 1 deletion amd_openvx_extensions/amd_rpp/include/kernels_rpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,8 @@ extern "C"
VX_KERNEL_RPP_SNOW = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x71,
VX_KERNEL_RPP_VIGNETTE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x72,
VX_KERNEL_RPP_WARPAFFINE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x73,
VX_KERNEL_RPP_PREEMPHASISFILTER = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x74
VX_KERNEL_RPP_PREEMPHASISFILTER = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x74,
VX_KERNEL_RPP_SPECTROGRAM = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x75
};

#ifdef __cplusplus
Expand Down
20 changes: 20 additions & 0 deletions amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -1876,6 +1876,26 @@ extern "C"
* \return A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a successful creation should be checked using <tt>\ref vxGetStatus</tt>.
*/
SHARED_PUBLIC vx_node VX_API_CALL vxExtRppPreemphasisFilter(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pPreemphCoeff, vx_scalar borderType);

/*! \brief [Graph] Produces a spectrogram from a 1D signal.
* \ingroup group_amd_rpp
* \param [in] graph The handle to the graph.
* \param [in] pSrc The input tensor in <tt>\ref VX_TYPE_FLOAT32</tt> format data.
* \param [in] pSrcRoi The input tensor of batch size in <tt>unsigned int<tt> containing the roi values for the input in xywh/ltrb format.
* \param [out] pDst The output tensor (begin) in <tt>\ref VX_TYPE_FLOAT32</tt> format data.
* \param [in] pDstRoi The input tensor of batch size in <tt>unsigned int<tt> containing the roi values for the output tensor in xywh/ltrb format.
* \param [in] windowFunction The input array in <tt>\ref VX_TYPE_FLOAT32</tt> format containing the samples of the window function that will be multiplied to each extracted window when calculating the STFT.
* \param [in] centerWindow The input scalar in <tt>\ref VX_TYPE_BOOL</tt> format indicates whether extracted windows should be padded so that the window function is centered at multiples of window_step.
* \param [in] reflectPadding The input scalar in <tt>\ref VX_TYPE_BOOL</tt> format indicates the padding policy when sampling outside the bounds of the signal.
* \param [in] spectrogramLayout The input scalar in <tt>\ref VX_TYPE_INT32</tt> format containing the Output spectrogram layout.
* \param [in] power The input scalar in <tt>\ref VX_TYPE_INT32</tt> format containing the exponent of the magnitude of the spectrum.
* \param [in] nfft The input scalar in <tt>\ref VX_TYPE_INT32</tt> format containing the size of the FFT.
* \param [in] windowLength The input scalar in <tt>\ref VX_TYPE_INT32</tt> format containing Window size in number of samples.
* \param [in] windowStep The input array in <tt>\ref VX_TYPE_INT32</tt> format containing the step between the STFT windows in number of samples.
* \return A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a successful creation should be checked using <tt>\ref vxGetStatus</tt>.
*/
SHARED_PUBLIC vx_node VX_API_CALL vxExtRppSpectrogram(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_tensor pDstRoi, vx_array windowFunction, vx_scalar centerWindow, vx_scalar reflectPadding, vx_scalar spectrogramLayout, vx_scalar power, vx_scalar nfft, vx_scalar windowLength, vx_scalar windowStep);

#ifdef __cplusplus
}
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ vx_status get_kernels_to_publish()
STATUS_ERROR_CHECK(ADD_KERNEL(Snow_Register));
STATUS_ERROR_CHECK(ADD_KERNEL(Vignette_Register));
STATUS_ERROR_CHECK(ADD_KERNEL(WarpAffine_Register));
STATUS_ERROR_CHECK(ADD_KERNEL(Spectrogram_Register));

return status;
}
Expand Down
63 changes: 59 additions & 4 deletions amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2558,6 +2558,32 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppPreemphasisFilter(vx_graph graph, vx_te
return node;
}

VX_API_ENTRY vx_node VX_API_CALL vxExtRppSpectrogram(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_tensor pDstRoi, vx_array windowFunction, vx_scalar centerWindows, vx_scalar reflectPadding, vx_scalar spectrogramLayout,
vx_scalar power, vx_scalar nfft, vx_scalar windowLength, vx_scalar windowStep) {
vx_node node = NULL;
vx_context context = vxGetContext((vx_reference)graph);
if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
vx_uint32 devtype = getGraphAffinity(graph);
vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devtype);
vx_reference params[] = {
(vx_reference)pSrc,
(vx_reference)pSrcRoi,
(vx_reference)pDst,
(vx_reference)pDstRoi,
(vx_reference)windowFunction,
(vx_reference)centerWindows,
(vx_reference)reflectPadding,
(vx_reference)spectrogramLayout,
(vx_reference)power,
(vx_reference)nfft,
(vx_reference)windowLength,
(vx_reference)windowStep,
(vx_reference)deviceType};
node = createNode(graph, VX_KERNEL_RPP_SPECTROGRAM, params, 13);
}
return node;
}

RpptDataType getRpptDataType(vx_enum vxDataType) {
switch(vxDataType) {
case vx_type_e::VX_TYPE_FLOAT32:
Expand All @@ -2571,6 +2597,34 @@ RpptDataType getRpptDataType(vx_enum vxDataType) {
}
}

RpptLayout getRpptLayout(vxTensorLayout layout) {
switch(layout) {
case vxTensorLayout::VX_NHWC:
return RpptLayout::NHWC;
case vxTensorLayout::VX_NCHW:
return RpptLayout::NCHW;
case vxTensorLayout::VX_NFHWC:
return RpptLayout::NHWC;
case vxTensorLayout::VX_NFCHW:
return RpptLayout::NCHW;
#if RPP_AUDIO
case vxTensorLayout::VX_NHW:
return RpptLayout::NHW;
case vxTensorLayout::VX_NFT:
return RpptLayout::NFT;
case vxTensorLayout::VX_NTF:
return RpptLayout::NTF;
#else
case vxTensorLayout::VX_NHW:
case vxTensorLayout::VX_NFT:
case vxTensorLayout::VX_NTF:
throw std::runtime_error("RPP_AUDIO flag disabled, Audio layouts are not supported");
#endif
default:
throw std::runtime_error("Invalid layout");
}
}

void fillDescriptionPtrfromDims(RpptDescPtr &descPtr, vxTensorLayout layout, size_t *tensorDims) {
switch(layout) {
case vxTensorLayout::VX_NHWC: {
Expand Down Expand Up @@ -2627,16 +2681,17 @@ void fillDescriptionPtrfromDims(RpptDescPtr &descPtr, vxTensorLayout layout, siz
}
}

void fillAudioDescriptionPtrFromDims(RpptDescPtr &descPtr, size_t *tensorDims) {
descPtr->n = tensorDims[0];
descPtr->h = tensorDims[2];
descPtr->w = tensorDims[1];
void fillAudioDescriptionPtrFromDims(RpptDescPtr &descPtr, size_t *maxTensorDims, vxTensorLayout layout) {
descPtr->n = maxTensorDims[0];
descPtr->h = maxTensorDims[1];
descPtr->w = maxTensorDims[2];
descPtr->c = 1;
descPtr->strides.nStride = descPtr->c * descPtr->w * descPtr->h;
descPtr->strides.hStride = descPtr->c * descPtr->w;
descPtr->strides.wStride = descPtr->c;
descPtr->strides.cStride = 1;
descPtr->numDims = 4;
descPtr->layout = getRpptLayout(layout);
}

// utility functions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,12 @@ static vx_status VX_CALLBACK processPreemphasisFilter(vx_node node, const vx_ref
#endif
}
if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
#if RPP_AUDIO
rpp_status = rppt_pre_emphasis_filter_host((float *)data->pSrc, data->pSrcDesc, (float *)data->pDst, data->pDstDesc, (Rpp32s *)data->pSampleSize, data->pPreemphCoeff, RpptAudioBorderType(data->borderType), data->handle->rppHandle);
return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
#else
return_status = VX_ERROR_NOT_SUPPORTED;
#endif
}
return return_status;
}
Expand Down
Loading