diff --git a/include/rppdefs.h b/include/rppdefs.h
index 82539e139..d35f82bfd 100644
--- a/include/rppdefs.h
+++ b/include/rppdefs.h
@@ -134,7 +134,7 @@ typedef enum
     /*! \brief Out of bound source ROI \ingroup group_rppdefs */
     RPP_ERROR_OUT_OF_BOUND_SRC_ROI      = -17,
     /*! \brief src and dst layout mismatch \ingroup group_rppdefs */
-    RPP_ERROR_SRC_DST_LAYOUT_MISMATCH   = -18,
+    RPP_ERROR_LAYOUT_MISMATCH           = -18,
     /*! \brief Number of channels is invalid. (Needs to adhere to function specification.) \ingroup group_rppdefs */
     RPP_ERROR_INVALID_CHANNELS          = -19
 } RppStatus;
diff --git a/include/rppt_tensor_audio_augmentations.h b/include/rppt_tensor_audio_augmentations.h
index 09e4cbd56..4e5f412db 100644
--- a/include/rppt_tensor_audio_augmentations.h
+++ b/include/rppt_tensor_audio_augmentations.h
@@ -60,7 +60,7 @@ extern "C" {
  * \retval RPP_SUCCESS Successful completion.
  * \retval RPP_ERROR* Unsuccessful completion.
  */
-RppStatus rppt_non_silent_region_detection_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, Rpp32s *srcLengthTensor, Rpp32f *detectedIndexTensor, Rpp32f *detectionLengthTensor, Rpp32f cutOffDB, Rpp32s windowLength, Rpp32f referencePower, Rpp32s resetInterval, rppHandle_t rppHandle);
+RppStatus rppt_non_silent_region_detection_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, Rpp32s *srcLengthTensor, Rpp32s *detectedIndexTensor, Rpp32s *detectionLengthTensor, Rpp32f cutOffDB, Rpp32s windowLength, Rpp32f referencePower, Rpp32s resetInterval, rppHandle_t rppHandle);
 
 /*! \brief To Decibels augmentation on HOST backend
  * \details To Decibels augmentation for 1D audio buffer converts magnitude values to decibel values
diff --git a/include/rppt_tensor_geometric_augmentations.h b/include/rppt_tensor_geometric_augmentations.h
index 695c3252d..8e846a41b 100644
--- a/include/rppt_tensor_geometric_augmentations.h
+++ b/include/rppt_tensor_geometric_augmentations.h
@@ -448,38 +448,42 @@ RppStatus rppt_phase_gpu(RppPtr_t srcPtr1, RppPtr_t srcPtr2, RpptDescPtr srcDesc
  * \details This function performs slice augmentation on a generic 4D tensor.
  *          Slice augmentation involves selecting a region of interest (ROI) from the source tensor
  *          and copying it to the destination tensor. Support added for f32 -> f32 and u8 -> u8 dataypes.
- * \param[in] srcPtr source tensor in HOST memory
- * \param[in] srcGenericDescPtr source tensor descriptor
- * \param[out] dstPtr destination tensor in HOST memory
- * \param[in] dstGenericDescPtr destination tensor descriptor
- * \param[in] roiGenericPtrSrc ROI data for each image in source tensor (tensor of batchSize RpptRoiGeneric values)
- * \param[in] roiType ROI type used (RpptRoi3DType::XYZWHD or RpptRoi3DType::LTFRBB)
+ * \param [in] srcPtr source tensor memory in HOST memory
+ * \param [in] srcGenericDescPtr source tensor descriptor
+ * \param [out] dstPtr destination tensor memory in HOST memory
+ * \param [in] dstGenericDescPtr destination tensor descriptor
+ * \param [in] anchorTensor starting index of the slice for each dimension in input (1D tensor of size = batchSize * numberOfDimensions)
+ * \param [in] shapeTensor length of the slice for each dimension in input (1D tensor of size = batchSize * numberOfDimensions)
+ * \param [in] fillValue fill value that is used to fill output if enablePadding is set to true
+ * \param [in] enablePadding boolean flag to specify if padding is enabled or not
+ * \param [in] roiTensor roi data in HOST memory (1D tensor of size = batchSize * numberOfDimensions * 2)
  * \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
  * \return A <tt> \ref RppStatus</tt> enumeration.
  * \retval RPP_SUCCESS Successful completion.
  * \retval RPP_ERROR* Unsuccessful completion.
- * \ingroup group_tensor_geometric
  */
-RppStatus rppt_slice_host(RppPtr_t srcPtr, RpptGenericDescPtr srcGenericDescPtr, RppPtr_t dstPtr, RpptGenericDescPtr dstGenericDescPtr, RpptROI3DPtr roiGenericPtrSrc, RpptRoi3DType roiType, rppHandle_t rppHandle);
+RppStatus rppt_slice_host(RppPtr_t srcPtr, RpptGenericDescPtr srcGenericDescPtr, RppPtr_t dstPtr, RpptGenericDescPtr dstGenericDescPtr, Rpp32s *anchorTensor, Rpp32s *shapeTensor, RppPtr_t fillValue, bool enablePadding, Rpp32u *roiTensor, rppHandle_t rppHandle);
 
 #ifdef GPU_SUPPORT
 /*! \brief Slice augmentation GPU
  * \details This function performs slice augmentation on a generic 4D tensor.
  *          Slice augmentation involves selecting a region of interest (ROI) from the source tensor
  *          and copying it to the destination tensor. Support added for f32 -> f32 and u8 -> u8 dataypes.
- * \param[in] srcPtr source tensor in HIP memory
- * \param[in] srcGenericDescPtr source tensor descriptor
- * \param[out] dstPtr destination tensor in HIP memory
- * \param[in] dstGenericDescPtr destination tensor descriptor
- * \param[in] roiGenericPtrSrc ROI data for each image in source tensor (tensor of batchSize RpptRoiGeneric values)
- * \param[in] roiType ROI type used (RpptRoi3DType::XYZWHD or RpptRoi3DType::LTFRBB)
+ * \param [in] srcPtr source tensor memory in HIP memory
+ * \param [in] srcGenericDescPtr source tensor descriptor
+ * \param [out] dstPtr destination tensor memory in HIP memory
+ * \param [in] dstGenericDescPtr destination tensor descriptor
+ * \param [in] anchorTensor starting index of the slice for each dimension in input (1D tensor in pinned/HOST memory of size = batchSize * numberOfDimensions)
+ * \param [in] shapeTensor length of the slice for each dimension in input (1D tensor in pinned/HOST memory of size = batchSize * numberOfDimensions)
+ * \param [in] fillValue fill value that is used to fill output if enablePadding is set to true
+ * \param [in] enablePadding boolean flag to specify if padding is enabled or not
+ * \param [in] roiTensor roi data in pinned/HOST memory (1D tensor of size = batchSize * numberOfDimensions * 2)
  * \param [in] rppHandle RPP HIP handle created with <tt>\ref rppCreateWithStreamAndBatchSize()</tt>
  * \return A <tt> \ref RppStatus</tt> enumeration.
  * \retval RPP_SUCCESS Successful completion.
  * \retval RPP_ERROR* Unsuccessful completion.
- * \ingroup group_tensor_geometric
  */
-RppStatus rppt_slice_gpu(RppPtr_t srcPtr, RpptGenericDescPtr srcGenericDescPtr, RppPtr_t dstPtr, RpptGenericDescPtr dstGenericDescPtr, RpptROI3DPtr roiGenericPtrSrc, RpptRoi3DType roiType, rppHandle_t rppHandle);
+RppStatus rppt_slice_gpu(RppPtr_t srcPtr, RpptGenericDescPtr srcGenericDescPtr, RppPtr_t dstPtr, RpptGenericDescPtr dstGenericDescPtr, Rpp32s *anchorTensor, Rpp32s *shapeTensor, RppPtr_t fillValue, bool enablePadding, Rpp32u *roiTensor, rppHandle_t rppHandle);
 #endif // GPU_SUPPORT
 
 /*! \brief Crop and Patch augmentation on HOST backend for a NCHW/NHWC layout tensor
@@ -539,15 +543,15 @@ RppStatus rppt_crop_and_patch_gpu(RppPtr_t srcPtr1, RppPtr_t srcPtr2, RpptDescPt
             <br> Support added for f32 -> f32 and u8 -> u8 dataypes.
  * \image html input150x150x4.gif Sample Input
  * \image html geometric_augmentations_flip_150x150x4.gif Sample Output
- * \param[in] srcPtr source tensor in HOST memory
- * \param[in] srcGenericDescPtr source tensor descriptor (Restrictions - numDims = 5, offsetInBytes >= 0, dataType = U8/F32, layout = NCDHW/NDHWC, c = 1/3)
- * \param[out] dstPtr destination tensor in HOST memory
- * \param[in] dstGenericDescPtr destination tensor descriptor (Restrictions - numDims = 5, offsetInBytes >= 0, dataType = U8/F32, layout = NCDHW/NDHWC, c = 1/3)
+ * \param [in] srcPtr source tensor in HOST memory
+ * \param [in] srcGenericDescPtr source tensor descriptor (Restrictions - numDims = 5, offsetInBytes >= 0, dataType = U8/F32, layout = NCDHW/NDHWC, c = 1/3)
+ * \param [out] dstPtr destination tensor in HOST memory
+ * \param [in] dstGenericDescPtr destination tensor descriptor (Restrictions - numDims = 5, offsetInBytes >= 0, dataType = U8/F32, layout = NCDHW/NDHWC, c = 1/3)
  * \param [in] horizontalTensor horizontal flag values to set horizontal flip on/off (1D tensor in HOST memory, of size batchSize, with horizontalTensor[i] = 0/1)
  * \param [in] verticalTensor vertical flag values to set vertical flip on/off (1D tensor in HOST memory, of size batchSize, with verticalTensor[i] = 0/1)
  * \param [in] depthTensor depth flag values to set depth flip on/off (1D tensor in HOST memory, of size batchSize, with depthTensor[i] = 0/1)
- * \param[in] roiGenericPtrSrc ROI data for each image in source tensor (tensor of batchSize RpptRoiGeneric values)
- * \param[in] roiType ROI type used (RpptRoi3DType::XYZWHD or RpptRoi3DType::LTFRBB)
+ * \param [in] roiGenericPtrSrc ROI data for each image in source tensor (tensor of batchSize RpptRoiGeneric values)
+ * \param [in] roiType ROI type used (RpptRoi3DType::XYZWHD or RpptRoi3DType::LTFRBB)
  * \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
  * \return A <tt> \ref RppStatus</tt> enumeration.
  * \retval RPP_SUCCESS Successful completion.
@@ -562,15 +566,15 @@ RppStatus rppt_flip_voxel_host(RppPtr_t srcPtr, RpptGenericDescPtr srcGenericDes
             <br> Support added for f32 -> f32 and u8 -> u8 dataypes.
  * \image html input150x150x4.gif Sample Input
  * \image html geometric_augmentations_flip_150x150x4.gif Sample Output
- * \param[in] srcPtr source tensor in HIP memory
- * \param[in] srcGenericDescPtr source tensor descriptor (Restrictions - numDims = 5, offsetInBytes >= 0, dataType = U8/F32, layout = NCDHW/NDHWC, c = 1/3)
- * \param[out] dstPtr destination tensor in HIP memory
- * \param[in] dstGenericDescPtr destination tensor descriptor (Restrictions - numDims = 5, offsetInBytes >= 0, dataType = U8/F32, layout = NCDHW/NDHWC, c = 1/3)
+ * \param [in] srcPtr source tensor in HIP memory
+ * \param [in] srcGenericDescPtr source tensor descriptor (Restrictions - numDims = 5, offsetInBytes >= 0, dataType = U8/F32, layout = NCDHW/NDHWC, c = 1/3)
+ * \param [out] dstPtr destination tensor in HIP memory
+ * \param [in] dstGenericDescPtr destination tensor descriptor (Restrictions - numDims = 5, offsetInBytes >= 0, dataType = U8/F32, layout = NCDHW/NDHWC, c = 1/3)
  * \param [in] horizontalTensor horizontal flag values to set horizontal flip on/off (1D tensor in pinned/HOST memory, of size batchSize, with horizontalTensor[i] = 0/1)
  * \param [in] verticalTensor vertical flag values to set vertical flip on/off (1D tensor in pinned/HOST memory, of size batchSize, with verticalTensor[i] = 0/1)
  * \param [in] depthTensor depth flag values to set depth flip on/off (1D tensor in pinned/HOST memory, of size batchSize, with depthTensor[i] = 0/1)
- * \param[in] roiGenericPtrSrc ROI data for each image in source tensor (tensor of batchSize RpptRoiGeneric values)
- * \param[in] roiType ROI type used (RpptRoi3DType::XYZWHD or RpptRoi3DType::LTFRBB)
+ * \param [in] roiGenericPtrSrc ROI data for each image in source tensor (tensor of batchSize RpptRoiGeneric values)
+ * \param [in] roiType ROI type used (RpptRoi3DType::XYZWHD or RpptRoi3DType::LTFRBB)
  * \param [in] rppHandle RPP HIP handle created with <tt>\ref rppCreateWithStreamAndBatchSize()</tt>
  * \return A <tt> \ref RppStatus</tt> enumeration.
  * \retval RPP_SUCCESS Successful completion.
diff --git a/src/modules/cpu/kernel/non_silent_region_detection.hpp b/src/modules/cpu/kernel/non_silent_region_detection.hpp
index 74dffb18e..39d9e6940 100644
--- a/src/modules/cpu/kernel/non_silent_region_detection.hpp
+++ b/src/modules/cpu/kernel/non_silent_region_detection.hpp
@@ -95,8 +95,8 @@ Rpp32f getSquare(Rpp32f &value)
 RppStatus non_silent_region_detection_host_tensor(Rpp32f *srcPtr,
                                                   RpptDescPtr srcDescPtr,
                                                   Rpp32s *srcLengthTensor,
-                                                  Rpp32f *detectedIndexTensor,
-                                                  Rpp32f *detectionLengthTensor,
+                                                  Rpp32s *detectedIndexTensor,
+                                                  Rpp32s *detectionLengthTensor,
                                                   Rpp32f cutOffDB,
                                                   Rpp32s windowLength,
                                                   Rpp32f referencePower,
diff --git a/src/modules/cpu/kernel/slice.hpp b/src/modules/cpu/kernel/slice.hpp
index c451b67b4..37c3097c9 100644
--- a/src/modules/cpu/kernel/slice.hpp
+++ b/src/modules/cpu/kernel/slice.hpp
@@ -26,184 +26,213 @@ SOFTWARE.
 #include "rpp_cpu_simd.hpp"
 #include "rpp_cpu_common.hpp"
 
-RppStatus slice_f32_f32_host_tensor(Rpp32f *srcPtr,
-                                    RpptGenericDescPtr srcGenericDescPtr,
-                                    Rpp32f *dstPtr,
-                                    RpptGenericDescPtr dstGenericDescPtr,
-                                    RpptROI3DPtr roiGenericPtrSrc,
-                                    RpptRoi3DType roiType,
-                                    RppLayoutParams layoutParams,
-                                    rpp::Handle& handle)
+template<typename T>
+RppStatus slice_host_tensor(T *srcPtr,
+                            RpptGenericDescPtr srcGenericDescPtr,
+                            T *dstPtr,
+                            RpptGenericDescPtr dstGenericDescPtr,
+                            Rpp32s *anchorTensor,
+                            Rpp32s *shapeTensor,
+                            T* fillValue,
+                            bool enablePadding,
+                            Rpp32u *roiTensor,
+                            RppLayoutParams layoutParams,
+                            rpp::Handle& handle)
 {
-    RpptROI3D roiDefault;
-    if(srcGenericDescPtr->layout==RpptLayout::NCDHW)
-        roiDefault = {0, 0, 0, (Rpp32s)srcGenericDescPtr->dims[4], (Rpp32s)srcGenericDescPtr->dims[3], (Rpp32s)srcGenericDescPtr->dims[2]};
-    else if(srcGenericDescPtr->layout==RpptLayout::NDHWC)
-        roiDefault = {0, 0, 0, (Rpp32s)srcGenericDescPtr->dims[3], (Rpp32s)srcGenericDescPtr->dims[2], (Rpp32s)srcGenericDescPtr->dims[1]};
     Rpp32u numThreads = handle.GetNumThreads();
+    Rpp32u numDims = srcGenericDescPtr->numDims - 1; // exclude batchsize from input dims
 
     omp_set_dynamic(0);
 #pragma omp parallel for num_threads(numThreads)
     for(int batchCount = 0; batchCount < dstGenericDescPtr->dims[0]; batchCount++)
     {
-        RpptROI3D roi;
-        RpptROI3DPtr roiPtrInput = &roiGenericPtrSrc[batchCount];
-        compute_roi3D_validation_host(roiPtrInput, &roi, &roiDefault, roiType);
+        T *srcPtrTemp, *dstPtrTemp;
+        srcPtrTemp = srcPtr + batchCount * srcGenericDescPtr->strides[0];
+        dstPtrTemp = dstPtr + batchCount * dstGenericDescPtr->strides[0];
 
-        Rpp32f *srcPtrImage, *dstPtrImage;
-        srcPtrImage = srcPtr + batchCount * srcGenericDescPtr->strides[0];
-        dstPtrImage = dstPtr + batchCount * dstGenericDescPtr->strides[0];
+        T *srcPtrChannel, *dstPtrChannel;
+        dstPtrChannel = dstPtrTemp;
 
-        Rpp32u bufferLength = roi.xyzwhdROI.roiWidth * layoutParams.bufferMultiplier;
+        Rpp32s *anchor = &anchorTensor[batchCount * numDims];
+        Rpp32s *shape = &shapeTensor[batchCount * numDims];
 
-        Rpp32f *srcPtrChannel, *dstPtrChannel;
-        dstPtrChannel = dstPtrImage;
+        // get the starting address of length values from roiTensor
+        Rpp32u *roi = roiTensor + batchCount * numDims * 2;
+        Rpp32s *length = reinterpret_cast<Rpp32s *>(&roi[numDims]);
 
-        // Slice without fused output-layout toggle (NCDHW -> NCDHW)
-        if((srcGenericDescPtr->layout == RpptLayout::NCDHW) && (dstGenericDescPtr->layout == RpptLayout::NCDHW))
+        if (numDims == 4)
         {
-            srcPtrChannel = srcPtrImage + (roi.xyzwhdROI.xyz.z * srcGenericDescPtr->strides[2]) + (roi.xyzwhdROI.xyz.y * srcGenericDescPtr->strides[3]) + (roi.xyzwhdROI.xyz.x * layoutParams.bufferMultiplier);
-
-            Rpp32u copyLengthInBytes = bufferLength * sizeof(Rpp32f);
-            for(int c = 0; c < layoutParams.channelParam; c++)
+            // order of dims
+            Rpp32s dimsOrder[3];
+            if (dstGenericDescPtr->layout == RpptLayout::NCDHW)
+            {
+                dimsOrder[0] = 1;  // depth
+                dimsOrder[1] = 2;  // height
+                dimsOrder[2] = 3;  // width
+            }
+            else
             {
-                Rpp32f *srcPtrDepth, *dstPtrDepth;
-                srcPtrDepth = srcPtrChannel;
-                dstPtrDepth = dstPtrChannel;
+                dimsOrder[0] = 0;  // depth
+                dimsOrder[1] = 1;  // height
+                dimsOrder[2] = 2;  // width
+            }
+            Rpp32u maxDepth = std::min(shape[dimsOrder[0]], length[dimsOrder[0]] - anchor[dimsOrder[0]]);
+            Rpp32u maxHeight = std::min(shape[dimsOrder[1]], length[dimsOrder[1]] - anchor[dimsOrder[1]]);
+            Rpp32u maxWidth = std::min(shape[dimsOrder[2]], length[dimsOrder[2]] - anchor[dimsOrder[2]]);
+            Rpp32u bufferLength = maxWidth * layoutParams.bufferMultiplier;
+            Rpp32u copyLengthInBytes = bufferLength * sizeof(T);
+
+            // if padding is required, fill the buffer with fill value specified
+            bool needPadding = (((anchor[dimsOrder[0]] + shape[dimsOrder[0]]) > length[dimsOrder[0]]) ||
+                                ((anchor[dimsOrder[1]] + shape[dimsOrder[1]]) > length[dimsOrder[1]]) ||
+                                ((anchor[dimsOrder[2]] + shape[dimsOrder[2]]) > length[dimsOrder[2]]));
+            if (needPadding && enablePadding)
+                std::fill(dstPtrChannel, dstPtrChannel + dstGenericDescPtr->strides[0] - 1, *fillValue);
+
+            // slice without fused output-layout toggle (NCDHW -> NCDHW)
+            if (dstGenericDescPtr->layout == RpptLayout::NCDHW)
+            {
+                srcPtrChannel = srcPtrTemp + (anchor[1] * srcGenericDescPtr->strides[2]) + (anchor[2] * srcGenericDescPtr->strides[3]) + (anchor[3] * layoutParams.bufferMultiplier);
+                for(int c = 0; c < layoutParams.channelParam; c++)
+                {
+                    T *srcPtrDepth, *dstPtrDepth;
+                    srcPtrDepth = srcPtrChannel;
+                    dstPtrDepth = dstPtrChannel;
+                    for(int i = 0; i < maxDepth; i++)
+                    {
+                        T *srcPtrRow, *dstPtrRow;
+                        srcPtrRow = srcPtrDepth;
+                        dstPtrRow = dstPtrDepth;
+                        for(int j = 0; j < maxHeight; j++)
+                        {
+                            memcpy(dstPtrRow, srcPtrRow, copyLengthInBytes);
+                            srcPtrRow += srcGenericDescPtr->strides[3];
+                            dstPtrRow += dstGenericDescPtr->strides[3];
+                        }
+                        srcPtrDepth += srcGenericDescPtr->strides[2];
+                        dstPtrDepth += dstGenericDescPtr->strides[2];
+                    }
+                    srcPtrChannel += srcGenericDescPtr->strides[1];
+                    dstPtrChannel += srcGenericDescPtr->strides[1];
+                }
+            }
 
-                for(int i = 0; i < roi.xyzwhdROI.roiDepth; i++)
+            // slice without fused output-layout toggle (NDHWC -> NDHWC)
+            else if (dstGenericDescPtr->layout == RpptLayout::NDHWC)
+            {
+                srcPtrChannel = srcPtrTemp + (anchor[0] * srcGenericDescPtr->strides[1]) + (anchor[1] * srcGenericDescPtr->strides[2]) + (anchor[2] * layoutParams.bufferMultiplier);
+                T *srcPtrDepth = srcPtrChannel;
+                T *dstPtrDepth = dstPtrChannel;
+                for(int i = 0; i < maxDepth; i++)
                 {
-                    Rpp32f *srcPtrRow, *dstPtrRow;
+                    T *srcPtrRow, *dstPtrRow;
                     srcPtrRow = srcPtrDepth;
                     dstPtrRow = dstPtrDepth;
-
-                    for(int j = 0; j < roi.xyzwhdROI.roiHeight; j++)
+                    for(int j = 0; j < maxHeight; j++)
                     {
                         memcpy(dstPtrRow, srcPtrRow, copyLengthInBytes);
-
-                        srcPtrRow += srcGenericDescPtr->strides[3];
-                        dstPtrRow += dstGenericDescPtr->strides[3];
+                        srcPtrRow += srcGenericDescPtr->strides[2];
+                        dstPtrRow += dstGenericDescPtr->strides[2];
                     }
-                    srcPtrDepth += srcGenericDescPtr->strides[2];
-                    dstPtrDepth += dstGenericDescPtr->strides[2];
+                    srcPtrDepth += srcGenericDescPtr->strides[1];
+                    dstPtrDepth += dstGenericDescPtr->strides[1];
                 }
-
-                srcPtrChannel += srcGenericDescPtr->strides[1];
-                dstPtrChannel += srcGenericDescPtr->strides[1];
             }
         }
-        // Slice without fused output-layout toggle (NDHWC -> NDHWC)
-        else if((srcGenericDescPtr->layout == RpptLayout::NDHWC) && (dstGenericDescPtr->layout == RpptLayout::NDHWC))
+        else if (numDims == 3)
         {
-            Rpp32u copyLengthInBytes = bufferLength * sizeof(Rpp32f);
-            srcPtrChannel = srcPtrImage + (roi.xyzwhdROI.xyz.z * srcGenericDescPtr->strides[1]) + (roi.xyzwhdROI.xyz.y * srcGenericDescPtr->strides[2]) + (roi.xyzwhdROI.xyz.x * layoutParams.bufferMultiplier);
-            Rpp32f *srcPtrDepth = srcPtrChannel;
-            Rpp32f *dstPtrDepth = dstPtrChannel;
-
-            for(int i = 0; i < roi.xyzwhdROI.roiDepth; i++)
+            // order of dims
+            Rpp32s dimsOrder[2];
+            if (dstGenericDescPtr->layout == RpptLayout::NCHW)
             {
-                Rpp32f *srcPtrRow, *dstPtrRow;
-                srcPtrRow = srcPtrDepth;
-                dstPtrRow = dstPtrDepth;
-
-                for(int j = 0; j < roi.xyzwhdROI.roiHeight; j++)
-                {
-                    memcpy(dstPtrRow, srcPtrRow, copyLengthInBytes);
-
-                    srcPtrRow += srcGenericDescPtr->strides[2];
-                    dstPtrRow += dstGenericDescPtr->strides[2];
-                }
-                srcPtrDepth += srcGenericDescPtr->strides[1];
-                dstPtrDepth += dstGenericDescPtr->strides[1];
+                dimsOrder[0] = 1;  // height
+                dimsOrder[1] = 2;  // width
+            }
+            else
+            {
+                dimsOrder[0] = 0;  // height
+                dimsOrder[1] = 1;  // width
             }
-        }
-    }
-
-    return RPP_SUCCESS;
-}
-
-RppStatus slice_u8_u8_host_tensor(Rpp8u *srcPtr,
-                                  RpptGenericDescPtr srcGenericDescPtr,
-                                  Rpp8u *dstPtr,
-                                  RpptGenericDescPtr dstGenericDescPtr,
-                                  RpptROI3DPtr roiGenericPtrSrc,
-                                  RpptRoi3DType roiType,
-                                  RppLayoutParams layoutParams,
-                                  rpp::Handle& handle)
-{
-    RpptROI3D roiDefault;
-    if(srcGenericDescPtr->layout==RpptLayout::NCDHW)
-        roiDefault = {0, 0, 0, (Rpp32s)srcGenericDescPtr->dims[4], (Rpp32s)srcGenericDescPtr->dims[3], (Rpp32s)srcGenericDescPtr->dims[2]};
-    else if(srcGenericDescPtr->layout==RpptLayout::NDHWC)
-        roiDefault = {0, 0, 0, (Rpp32s)srcGenericDescPtr->dims[3], (Rpp32s)srcGenericDescPtr->dims[2], (Rpp32s)srcGenericDescPtr->dims[1]};
-    Rpp32u numThreads = handle.GetNumThreads();
-
-    omp_set_dynamic(0);
-#pragma omp parallel for num_threads(numThreads)
-    for(int batchCount = 0; batchCount < dstGenericDescPtr->dims[0]; batchCount++)
-    {
-        RpptROI3D roi;
-        RpptROI3DPtr roiPtrInput = &roiGenericPtrSrc[batchCount];
-        compute_roi3D_validation_host(roiPtrInput, &roi, &roiDefault, roiType);
 
-        Rpp8u *srcPtrImage, *dstPtrImage;
-        srcPtrImage = srcPtr + batchCount * srcGenericDescPtr->strides[0];
-        dstPtrImage = dstPtr + batchCount * dstGenericDescPtr->strides[0];
+            Rpp32u maxHeight = std::min(shape[dimsOrder[0]], length[dimsOrder[0]] - anchor[dimsOrder[0]]);
+            Rpp32u maxWidth = std::min(shape[dimsOrder[1]], length[dimsOrder[1]] - anchor[dimsOrder[1]]);
+            Rpp32u bufferLength = maxWidth * layoutParams.bufferMultiplier;
+            Rpp32u copyLengthInBytes = bufferLength * sizeof(T);
 
-        Rpp32u bufferLength = roi.xyzwhdROI.roiWidth * layoutParams.bufferMultiplier;
-        Rpp8u *srcPtrChannel, *dstPtrChannel;
-        dstPtrChannel = dstPtrImage;
+            // if padding is required, fill the buffer with fill value specified
+            bool needPadding = ((anchor[dimsOrder[0]] + shape[dimsOrder[0]]) > length[dimsOrder[0]]) ||
+                               ((anchor[dimsOrder[1]] + shape[dimsOrder[1]]) > length[dimsOrder[1]]);
+            if (needPadding && enablePadding)
+                std::fill(dstPtrChannel, dstPtrChannel + dstGenericDescPtr->strides[0] - 1, *fillValue);
 
-        // Slice without fused output-layout toggle (NCDHW -> NCDHW)
-        if((srcGenericDescPtr->layout == RpptLayout::NCDHW) && (dstGenericDescPtr->layout == RpptLayout::NCDHW))
-        {
-            srcPtrChannel = srcPtrImage + (roi.xyzwhdROI.xyz.z * srcGenericDescPtr->strides[2]) + (roi.xyzwhdROI.xyz.y * srcGenericDescPtr->strides[3]) + (roi.xyzwhdROI.xyz.x * layoutParams.bufferMultiplier);
-            for(int c = 0; c < layoutParams.channelParam; c++)
+            // slice without fused output-layout toggle (NCHW -> NCHW)
+            if (dstGenericDescPtr->layout == RpptLayout::NCHW)
             {
-                Rpp8u *srcPtrDepth, *dstPtrDepth;
-                srcPtrDepth = srcPtrChannel;
-                dstPtrDepth = dstPtrChannel;
-                for(int i = 0; i < roi.xyzwhdROI.roiDepth; i++)
+                srcPtrChannel = srcPtrTemp + (anchor[1] * srcGenericDescPtr->strides[2]) + (anchor[2] * layoutParams.bufferMultiplier);
+                for(int c = 0; c < layoutParams.channelParam; c++)
                 {
-                    Rpp8u *srcPtrRow, *dstPtrRow;
-                    srcPtrRow = srcPtrDepth;
-                    dstPtrRow = dstPtrDepth;
-                    for(int j = 0; j < roi.xyzwhdROI.roiHeight; j++)
+                    T *srcPtrRow, *dstPtrRow;
+                    srcPtrRow = srcPtrChannel;
+                    dstPtrRow = dstPtrChannel;
+                    for(int j = 0; j < maxHeight; j++)
                     {
-                        memcpy(dstPtrRow, srcPtrRow, bufferLength * sizeof(Rpp8u));
-                        srcPtrRow += srcGenericDescPtr->strides[3];
-                        dstPtrRow += dstGenericDescPtr->strides[3];
+                        memcpy(dstPtrRow, srcPtrRow, copyLengthInBytes);
+                        srcPtrRow += srcGenericDescPtr->strides[2];
+                        dstPtrRow += dstGenericDescPtr->strides[2];
                     }
-                    srcPtrDepth += srcGenericDescPtr->strides[2];
-                    dstPtrDepth += dstGenericDescPtr->strides[2];
+                    srcPtrChannel += srcGenericDescPtr->strides[1];
+                    dstPtrChannel += srcGenericDescPtr->strides[1];
                 }
-                srcPtrChannel += srcGenericDescPtr->strides[1];
-                dstPtrChannel += srcGenericDescPtr->strides[1];
             }
-        }
-        // Slice without fused output-layout toggle (NDHWC -> NDHWC)
-        else if((srcGenericDescPtr->layout == RpptLayout::NDHWC) && (dstGenericDescPtr->layout == RpptLayout::NDHWC))
-        {
-            srcPtrChannel = srcPtrImage + (roi.xyzwhdROI.xyz.z * srcGenericDescPtr->strides[1]) + (roi.xyzwhdROI.xyz.y * srcGenericDescPtr->strides[2]) + (roi.xyzwhdROI.xyz.x * layoutParams.bufferMultiplier);
-            Rpp8u *srcPtrDepth = srcPtrChannel;
-            Rpp8u *dstPtrDepth = dstPtrChannel;
 
-            for(int i = 0; i < roi.xyzwhdROI.roiDepth; i++)
+            // slice without fused output-layout toggle (NHWC -> NHWC)
+            else if (dstGenericDescPtr->layout == RpptLayout::NHWC)
             {
-                Rpp8u *srcPtrRow, *dstPtrRow;
-                srcPtrRow = srcPtrDepth;
-                dstPtrRow = dstPtrDepth;
-
-                for(int j = 0; j < roi.xyzwhdROI.roiHeight; j++)
+                srcPtrChannel = srcPtrTemp + (anchor[0] * srcGenericDescPtr->strides[1]) + (anchor[1] * layoutParams.bufferMultiplier);
+                T *srcPtrRow = srcPtrChannel;
+                T *dstPtrRow = dstPtrChannel;
+                for(int j = 0; j < maxHeight; j++)
                 {
-                    memcpy(dstPtrRow, srcPtrRow, bufferLength * sizeof(Rpp8u));
-                    srcPtrRow += srcGenericDescPtr->strides[2];
-                    dstPtrRow += dstGenericDescPtr->strides[2];
+                    memcpy(dstPtrRow, srcPtrRow, copyLengthInBytes);
+                    srcPtrRow += srcGenericDescPtr->strides[1];
+                    dstPtrRow += dstGenericDescPtr->strides[1];
                 }
-                srcPtrDepth += srcGenericDescPtr->strides[1];
-                dstPtrDepth += dstGenericDescPtr->strides[1];
             }
         }
+        else if (numDims == 2)
+        {
+            srcPtrChannel = srcPtrTemp + (anchor[0] * srcGenericDescPtr->strides[1]) + anchor[1];
+            Rpp32u maxHeight = std::min(shape[0], length[0] - anchor[0]);
+            Rpp32u maxWidth = std::min(shape[1], length[1] - anchor[1]);
+            Rpp32u copyLengthInBytes = maxWidth * sizeof(T);
+
+            // if padding is required, fill the buffer with fill value specified
+            bool needPadding = ((anchor[0] + shape[0]) > length[0]) ||
+                                ((anchor[1] + shape[1]) > length[1]);
+            if (needPadding && enablePadding)
+                std::fill(dstPtrChannel, dstPtrChannel + dstGenericDescPtr->strides[0] - 1, *fillValue);
+
+            T *srcPtrRow = srcPtrChannel;
+            T *dstPtrRow = dstPtrChannel;
+            for(int j = 0; j < maxHeight; j++)
+            {
+                memcpy(dstPtrRow, srcPtrRow, copyLengthInBytes);
+                srcPtrRow += srcGenericDescPtr->strides[1];
+                dstPtrRow += dstGenericDescPtr->strides[1];
+            }
+        }
+        else if (numDims == 1)
+        {
+            srcPtrChannel = srcPtrTemp + anchor[0];
+            Rpp32u maxLength = std::min(shape[0], length[0] - anchor[0]);
+            Rpp32u copyLengthInBytes = maxLength * sizeof(T);
+
+            // if padding is required, fill the buffer with fill value specified
+            bool needPadding = ((anchor[0] + shape[0]) > length[0]);
+            if (needPadding && enablePadding)
+                std::fill(dstPtrTemp, dstPtrTemp + dstGenericDescPtr->strides[0] - 1, *fillValue);
+            memcpy(dstPtrChannel, srcPtrChannel, copyLengthInBytes);
+        }
     }
 
     return RPP_SUCCESS;
diff --git a/src/modules/hip/kernel/slice.hpp b/src/modules/hip/kernel/slice.hpp
index 8deb52bbb..c1b7a6c41 100644
--- a/src/modules/hip/kernel/slice.hpp
+++ b/src/modules/hip/kernel/slice.hpp
@@ -2,24 +2,79 @@
 #include <omp.h>
 #include "rpp_hip_common.hpp"
 
+template <typename T>
+__global__ void fill_value_ncdhw_hip_tensor(T *dstPtr,
+                                            uint3 dstStridesCDH,
+                                            int channels,
+                                            uint3 dstDimsDHW,
+                                            T *fillValue)
+{
+    int id_x = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x) * 8;        // W - inner most dim vectorized
+    int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;              // H - second to inner
+    int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;              // D - outer most dim
+
+    if ((id_z >= dstDimsDHW.x) || (id_y >= dstDimsDHW.y) || (id_x >= dstDimsDHW.z))
+    {
+        return;
+    }
+
+    uint dstIdx = (id_z * dstStridesCDH.y) + (id_y * dstStridesCDH.z) + id_x;
+    d_float8 val_f8;
+    val_f8.f4[0] = (float4)(*fillValue);
+    val_f8.f4[1] = val_f8.f4[0];
+    for(int c = 0; c < channels; c++)
+    {
+        rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &val_f8);
+        dstIdx += dstStridesCDH.x;
+    }
+}
+
+
+template <typename T>
+__global__ void fill_value_ndhwc_hip_tensor(T *dstPtr,
+                                            uint2 dstStridesDH,
+                                            uint3 dstDimsDHW,
+                                            T *fillValue)
+{
+    int id_x = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x) * 8;        // W - inner most dim vectorized
+    int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;              // H - second to inner
+    int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;              // D - outer most dim
+
+    if ((id_z >= dstDimsDHW.x) || (id_y >= dstDimsDHW.y) || (id_x >= dstDimsDHW.z))
+    {
+        return;
+    }
+
+    uint dstIdx = (id_z * dstStridesDH.x) + (id_y * dstStridesDH.y) + id_x * 3;
+    d_float24 val_f24;
+    val_f24.f4[0] = (float4)(*fillValue);
+    val_f24.f4[1] = val_f24.f4[0];
+    val_f24.f4[2] = val_f24.f4[0];
+    val_f24.f4[3] = val_f24.f4[0];
+    val_f24.f4[4] = val_f24.f4[0];
+    val_f24.f4[5] = val_f24.f4[0];
+    rpp_hip_pack_float24_pkd3_and_store24_pkd3(dstPtr + dstIdx, &val_f24);
+}
+
+
 template <typename T>
 __global__ void slice_ncdhw_hip_tensor(T *srcPtr,
                                        uint3 srcStridesCDH,
                                        T *dstPtr,
                                        uint3 dstStridesCDH,
                                        int channels,
-                                       RpptROI3DPtr roiGenericSrc)
+                                       uint3 validShapeDHW)
 {
     int id_x = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x) * 8;        // W - inner most dim vectorized
     int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;              // H - second to inner
     int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;              // D - outer most dim
 
-    if ((id_z >= roiGenericSrc->xyzwhdROI.roiDepth) || (id_y >= roiGenericSrc->xyzwhdROI.roiHeight) || (id_x >= roiGenericSrc->xyzwhdROI.roiWidth))
+    if ((id_z >= validShapeDHW.x) || (id_y >= validShapeDHW.y) || (id_x >= validShapeDHW.z))
     {
         return;
     }
 
-    uint srcIdx = ((id_z + roiGenericSrc->xyzwhdROI.xyz.z) * srcStridesCDH.y) + ((id_y + roiGenericSrc->xyzwhdROI.xyz.y) * srcStridesCDH.z) + (id_x + roiGenericSrc->xyzwhdROI.xyz.x);
+    uint srcIdx = (id_z * srcStridesCDH.y) + (id_y * srcStridesCDH.z) + id_x;
     uint dstIdx = (id_z * dstStridesCDH.y) + (id_y * dstStridesCDH.z) + id_x;
 
     d_float8 val_f8;
@@ -32,77 +87,439 @@ __global__ void slice_ncdhw_hip_tensor(T *srcPtr,
     }
 }
 
+
 template <typename T>
 __global__ void slice_ndhwc_hip_tensor(T *srcPtr,
                                        uint2 srcStridesDH,
                                        T *dstPtr,
                                        uint2 dstStridesDH,
-                                       RpptROI3DPtr roiGenericSrc)
+                                       uint3 validShapeDHW)
 {
     int id_x = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x) * 8;        // WC - inner most dim vectorized
     int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;              // H - second to inner
     int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;              // D - outer most dim
 
-    if ((id_z >= roiGenericSrc->xyzwhdROI.roiDepth) || (id_y >= roiGenericSrc->xyzwhdROI.roiHeight) || (id_x >= roiGenericSrc->xyzwhdROI.roiWidth))
+    if ((id_z >= validShapeDHW.x) || (id_y >= validShapeDHW.y) || (id_x >= validShapeDHW.z))
     {
         return;
     }
 
-    uint srcIdx = ((id_z + roiGenericSrc->xyzwhdROI.xyz.z) * srcStridesDH.x) + ((id_y + roiGenericSrc->xyzwhdROI.xyz.y) * srcStridesDH.y) + (id_x + roiGenericSrc->xyzwhdROI.xyz.x) * 3;
-    uint dstIdx = (id_z * dstStridesDH.x) + (id_y * dstStridesDH.y) + id_x * 3;
+    uint srcIdx = (id_z * srcStridesDH.x) + (id_y * srcStridesDH.y) + (id_x * 3);
+    uint dstIdx = (id_z * dstStridesDH.x) + (id_y * dstStridesDH.y) + (id_x * 3);
 
     d_float24 val_f24;
     rpp_hip_load24_pkd3_and_unpack_to_float24_pln3(srcPtr + srcIdx, &val_f24);
     rpp_hip_pack_float24_pln3_and_store24_pkd3(dstPtr + dstIdx, &val_f24);
 }
 
+template <typename T>
+RppStatus hip_exec_fill_value_tensor(T *dstPtr,
+                                     RpptGenericDescPtr dstGenericDescPtr,
+                                     Rpp32s *anchorTensor,
+                                     Rpp32s *shapeTensor,
+                                     T *fillValue,
+                                     Rpp32u *roiTensor,
+                                     rpp::Handle& handle,
+                                     Rpp32u numDims)
+{
+    if (numDims == 4)
+    {
+        // set the dimsOrder and globalthreads values required for NDHWC layout
+        Rpp32s dimsOrder[3] = {0, 1, 2};
+        int globalThreads_x = (dstGenericDescPtr->strides[2] / 3 + 7) >> 3; // W - width (x direction) - vectorized for 8 element loads/stores per channel
+        int globalThreads_y = dstGenericDescPtr->dims[2];                   // H - height (y direction)
+        int globalThreads_z = dstGenericDescPtr->dims[1];                   // D - depth (z direction)
+
+        // change the dimsOrder and globalthreads values if layout is NCDHW
+        if (dstGenericDescPtr->layout == RpptLayout::NCDHW)
+        {
+            dimsOrder[0] = 1;  // depth
+            dimsOrder[1] = 2;  // height
+            dimsOrder[2] = 3;  // width
+            globalThreads_x = (dstGenericDescPtr->strides[3] + 7) >> 3; // W - width (x direction) - vectorized for 8 element loads/stores per channel
+            globalThreads_y = dstGenericDescPtr->dims[3];               // H - height (y direction)
+            globalThreads_z = dstGenericDescPtr->dims[2];               // D - depth (z direction)
+        }
+        for(int batchCount = 0; batchCount < dstGenericDescPtr->dims[0]; batchCount++)
+        {
+            Rpp32s *anchor = &anchorTensor[batchCount * numDims];
+            Rpp32s *shape = &shapeTensor[batchCount * numDims];
+            Rpp32u *roi = roiTensor + batchCount * numDims * 2;
+            Rpp32s *length = reinterpret_cast<Rpp32s *>(&roi[numDims]);
+            Rpp32u maxDepth = std::min(shape[dimsOrder[0]], length[dimsOrder[0]] - anchor[dimsOrder[0]]);
+            Rpp32u maxHeight = std::min(shape[dimsOrder[1]], length[dimsOrder[1]] - anchor[dimsOrder[1]]);
+            Rpp32u maxWidth = std::min(shape[dimsOrder[2]], length[dimsOrder[2]] - anchor[dimsOrder[2]]);
+
+            // checking if padding is required
+            bool needPadding = (((anchor[dimsOrder[0]] + shape[dimsOrder[0]]) > length[dimsOrder[0]]) ||
+                                ((anchor[dimsOrder[1]] + shape[dimsOrder[1]]) > length[dimsOrder[1]]) ||
+                                ((anchor[dimsOrder[2]] + shape[dimsOrder[2]]) > length[dimsOrder[2]]));
+
+            // if needPadding is set, launch kernel for filling the padded region with fill value specified
+            if (needPadding && dstGenericDescPtr->layout == RpptLayout::NCDHW)
+            {
+                hipLaunchKernelGGL(fill_value_ncdhw_hip_tensor,
+                                   dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+                                   dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                                   0,
+                                   handle.GetStream(),
+                                   dstPtr + (batchCount * dstGenericDescPtr->strides[0]),
+                                   make_uint3(dstGenericDescPtr->strides[1], dstGenericDescPtr->strides[2], dstGenericDescPtr->strides[3]),
+                                   dstGenericDescPtr->dims[1],
+                                   make_uint3(maxDepth, maxHeight, maxWidth),
+                                   fillValue);
+            }
+            else if (needPadding && dstGenericDescPtr->layout == RpptLayout::NDHWC)
+            {
+                hipLaunchKernelGGL(fill_value_ndhwc_hip_tensor,
+                                   dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+                                   dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                                   0,
+                                   handle.GetStream(),
+                                   dstPtr + (batchCount * dstGenericDescPtr->strides[0]),
+                                   make_uint2(dstGenericDescPtr->strides[1], dstGenericDescPtr->strides[2]),
+                                   make_uint3(maxDepth, maxHeight, maxWidth),
+                                   fillValue);
+            }
+        }
+    }
+    else if (numDims == 3)
+    {
+        // set the dimsOrder and globalthreads values required for NHWC layout
+        Rpp32s dimsOrder[2] = {0, 1};
+        int globalThreads_x = (dstGenericDescPtr->strides[1] / 3 + 7) >> 3; // W - width  (x direction) - vectorized for 8 element loads/stores per channel
+        int globalThreads_y = dstGenericDescPtr->dims[1];                   // H - height (y direction)
+        int globalThreads_z = 1;
+
+        // change the dimsOrder and globalthreads values if layout is NCHW
+        if (dstGenericDescPtr->layout == RpptLayout::NCHW)
+        {
+            dimsOrder[0] = 1;  // height
+            dimsOrder[1] = 2;  // width
+            globalThreads_x = (dstGenericDescPtr->strides[2] + 7) >> 3; // W - width  (x direction) - vectorized for 8 element loads/stores per channel
+            globalThreads_y = dstGenericDescPtr->dims[2];               // H - height (y direction)
+            globalThreads_z = 1;
+        }
+
+        for(int batchCount = 0; batchCount < dstGenericDescPtr->dims[0]; batchCount++)
+        {
+            Rpp32s *anchor = &anchorTensor[batchCount * numDims];
+            Rpp32s *shape = &shapeTensor[batchCount * numDims];
+            Rpp32u *roi = roiTensor + batchCount * numDims * 2;
+            Rpp32s *length = reinterpret_cast<Rpp32s *>(&roi[numDims]);
+            Rpp32u maxHeight = std::min(shape[dimsOrder[0]], length[dimsOrder[0]] - anchor[dimsOrder[0]]);
+            Rpp32u maxWidth = std::min(shape[dimsOrder[1]], length[dimsOrder[1]] - anchor[dimsOrder[1]]);
+
+            // check if padding is needed
+            bool needPadding = (((anchor[dimsOrder[0]] + shape[dimsOrder[0]]) > length[dimsOrder[0]]) ||
+                                ((anchor[dimsOrder[1]] + shape[dimsOrder[1]]) > length[dimsOrder[1]]));
+
+            // launch kernel for filling the padded region with fill value specified
+            if (needPadding && dstGenericDescPtr->layout == RpptLayout::NCHW)
+            {
+                hipLaunchKernelGGL(fill_value_ncdhw_hip_tensor,
+                                   dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), globalThreads_z),
+                                   dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, 1),
+                                   0,
+                                   handle.GetStream(),
+                                   dstPtr + (batchCount * dstGenericDescPtr->strides[0]),
+                                   make_uint3(dstGenericDescPtr->strides[1], 0, dstGenericDescPtr->strides[2]),
+                                   dstGenericDescPtr->dims[1],
+                                   make_uint3(1, shape[1], shape[2]),
+                                   fillValue);
+            }
+            else if (needPadding && dstGenericDescPtr->layout == RpptLayout::NHWC)
+            {
+                hipLaunchKernelGGL(fill_value_ndhwc_hip_tensor,
+                                   dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), globalThreads_z),
+                                   dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, 1),
+                                   0,
+                                   handle.GetStream(),
+                                   dstPtr + (batchCount * dstGenericDescPtr->strides[0]),
+                                   make_uint2(1, dstGenericDescPtr->strides[1]),
+                                   make_uint3(1, maxHeight, maxWidth),
+                                   fillValue);
+            }
+        }
+    }
+    else if (numDims == 2)
+    {
+        // NHW
+        int globalThreads_x = (dstGenericDescPtr->strides[1] + 7) >> 3; // W - width (x direction) - vectorized for 8 element loads/stores per channel
+        int globalThreads_y = dstGenericDescPtr->dims[1];               // H - height (y direction)
+        int globalThreads_z = 1;
+
+        for(int batchCount = 0; batchCount < dstGenericDescPtr->dims[0]; batchCount++)
+        {
+            Rpp32s *anchor = &anchorTensor[batchCount * numDims];
+            Rpp32s *shape = &shapeTensor[batchCount * numDims];
+            Rpp32u *roi = roiTensor + batchCount * numDims * 2;
+            Rpp32s *length = reinterpret_cast<Rpp32s *>(&roi[numDims]);
+            Rpp32u maxHeight = std::min(shape[0], length[0] - anchor[0]);
+            Rpp32u maxWidth = std::min(shape[1], length[1] - anchor[1]);
+
+            // check if padding is needed
+            bool needPadding = (((anchor[0] + shape[0]) > length[0]) ||
+                                ((anchor[1] + shape[1]) > length[1]));
+
+            // launch kernel for filling the padded region with fill value specified
+            if (needPadding)
+            {
+                hipLaunchKernelGGL(fill_value_ncdhw_hip_tensor,
+                                    dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+                                    dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, 1),
+                                    0,
+                                    handle.GetStream(),
+                                    dstPtr + (batchCount * dstGenericDescPtr->strides[0]),
+                                    make_uint3(0, 0, dstGenericDescPtr->strides[1]),
+                                    1,
+                                    make_uint3(1, shape[0], shape[1]),
+                                    fillValue);
+            }
+        }
+    }
+    else if (numDims == 1)
+    {
+        int globalThreads_x = (dstGenericDescPtr->strides[0] + 7) >> 3; // W - width (x direction) - vectorized for 8 element loads/stores per channel
+        int globalThreads_y = 1;
+        int globalThreads_z = 1;
+
+        for(int batchCount = 0; batchCount < dstGenericDescPtr->dims[0]; batchCount++)
+        {
+            Rpp32s *anchor = &anchorTensor[batchCount * numDims];
+            Rpp32s *shape = &shapeTensor[batchCount * numDims];
+            Rpp32u *roi = roiTensor + batchCount * numDims * 2;
+            Rpp32s *length = reinterpret_cast<Rpp32s *>(&roi[numDims]);
+            Rpp32u maxLength = std::min(shape[0], length[0] - anchor[0]);
+
+            // check if padding is needed
+            bool needPadding = ((anchor[0] + shape[0]) > length[0]);
+
+            // launch kernel for filling the padded region with fill value specified
+            if (needPadding)
+            {
+                hipLaunchKernelGGL(fill_value_ncdhw_hip_tensor,
+                                   dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+                                   dim3(LOCAL_THREADS_X, 1, 1),
+                                   0,
+                                   handle.GetStream(),
+                                   dstPtr + (batchCount * dstGenericDescPtr->strides[0]),
+                                   make_uint3(0, 0, 1),
+                                   1,
+                                   make_uint3(1, 1, shape[0]),
+                                   fillValue);
+            }
+        }
+    }
+
+    return RPP_SUCCESS;
+}
+
 template <typename T>
 RppStatus hip_exec_slice_tensor(T *srcPtr,
                                 RpptGenericDescPtr srcGenericDescPtr,
                                 T *dstPtr,
                                 RpptGenericDescPtr dstGenericDescPtr,
-                                RpptROI3DPtr roiGenericPtrSrc,
+                                Rpp32s *anchorTensor,
+                                Rpp32s *shapeTensor,
+                                T *fillValue,
+                                bool enablePadding,
+                                Rpp32u *roiTensor,
                                 rpp::Handle& handle)
 {
-    if (dstGenericDescPtr->layout == RpptLayout::NCDHW)
+    Rpp32u numDims = srcGenericDescPtr->numDims - 1; // exclude batchsize from input dims
+
+    /* if enabledPadding is set to true, launch kernel to fill the output buffers with fill value specified.
+    This will be only done if shapeTensor[d] > roiTensor[d] where d is the dimension*/
+    if (enablePadding)
+    {
+        hip_exec_fill_value_tensor(dstPtr,
+                                   dstGenericDescPtr,
+                                   anchorTensor,
+                                   shapeTensor,
+                                   fillValue,
+                                   roiTensor,
+                                   handle,
+                                   numDims);
+        hipStreamSynchronize(handle.GetStream());
+    }
+
+    if(numDims == 4)
     {
-        int globalThreads_x = (dstGenericDescPtr->strides[3] + 7) >> 3; // W - width (x direction) - vectorized for 8 element loads/stores per channel
-        int globalThreads_y = dstGenericDescPtr->dims[3];               // H - height (y direction)
-        int globalThreads_z = dstGenericDescPtr->dims[2];               // D - depth (z direction)
+        // set the dimsOrder and globalthreads values required for NDHWC layout
+        Rpp32s dimsOrder[3] = {0, 1, 2};
+        int globalThreads_x = (dstGenericDescPtr->strides[2] / 3 + 7) >> 3; // W - width (x direction) - vectorized for 8 element loads/stores per channel
+        int globalThreads_y = dstGenericDescPtr->dims[2];                   // H - height (y direction)
+        int globalThreads_z = dstGenericDescPtr->dims[1];                   // D - depth (z direction)
+
+        // change the dimsOrder and globalthreads values if layout is NCDHW
+        if (dstGenericDescPtr->layout == RpptLayout::NCDHW)
+        {
+            dimsOrder[0] = 1;  // depth
+            dimsOrder[1] = 2;  // height
+            dimsOrder[2] = 3;  // width
+            globalThreads_x = (dstGenericDescPtr->strides[3] + 7) >> 3; // W - width (x direction) - vectorized for 8 element loads/stores per channel
+            globalThreads_y = dstGenericDescPtr->dims[3];               // H - height (y direction)
+            globalThreads_z = dstGenericDescPtr->dims[2];               // D - depth (z direction)
+        }
 
         for(int batchCount = 0; batchCount < dstGenericDescPtr->dims[0]; batchCount++)
         {
+            Rpp32s *anchor = &anchorTensor[batchCount * numDims];
+            Rpp32s *shape = &shapeTensor[batchCount * numDims];
+            Rpp32u *roi = roiTensor + batchCount * numDims * 2;
+            Rpp32s *length = reinterpret_cast<Rpp32s *>(&roi[numDims]);
+            Rpp32u maxDepth = std::min(shape[dimsOrder[0]], length[dimsOrder[0]] - anchor[dimsOrder[0]]);
+            Rpp32u maxHeight = std::min(shape[dimsOrder[1]], length[dimsOrder[1]] - anchor[dimsOrder[1]]);
+            Rpp32u maxWidth = std::min(shape[dimsOrder[2]], length[dimsOrder[2]] - anchor[dimsOrder[2]]);
+            if (dstGenericDescPtr->layout == RpptLayout::NCDHW)
+            {
+                T *srcPtrTemp = srcPtr + (batchCount * srcGenericDescPtr->strides[0]) + anchor[1] * srcGenericDescPtr->strides[2] + anchor[2] * srcGenericDescPtr->strides[3] + anchor[3];
+                T *dstPtrTemp = dstPtr + (batchCount * dstGenericDescPtr->strides[0]);
+                hipLaunchKernelGGL(slice_ncdhw_hip_tensor,
+                                   dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+                                   dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                                   0,
+                                   handle.GetStream(),
+                                   srcPtrTemp,
+                                   make_uint3(srcGenericDescPtr->strides[1], srcGenericDescPtr->strides[2], srcGenericDescPtr->strides[3]),
+                                   dstPtrTemp,
+                                   make_uint3(dstGenericDescPtr->strides[1], dstGenericDescPtr->strides[2], dstGenericDescPtr->strides[3]),
+                                   dstGenericDescPtr->dims[1],
+                                   make_uint3(maxDepth, maxHeight, maxWidth));
+            }
+            else if (dstGenericDescPtr->layout == RpptLayout::NDHWC)
+            {
+                T *srcPtrTemp = srcPtr + (batchCount * srcGenericDescPtr->strides[0]) + anchor[0] * srcGenericDescPtr->strides[1] + anchor[1] * srcGenericDescPtr->strides[2] + anchor[2];
+                T *dstPtrTemp = dstPtr + (batchCount * dstGenericDescPtr->strides[0]);
+                hipLaunchKernelGGL(slice_ndhwc_hip_tensor,
+                                   dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+                                   dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                                   0,
+                                   handle.GetStream(),
+                                   srcPtrTemp,
+                                   make_uint2(srcGenericDescPtr->strides[1], srcGenericDescPtr->strides[2]),
+                                   dstPtrTemp,
+                                   make_uint2(dstGenericDescPtr->strides[1], dstGenericDescPtr->strides[2]),
+                                   make_uint3(maxDepth, maxHeight, maxWidth));
+            }
+        }
+    }
+    else if (numDims == 3)
+    {
+        // set the dimsOrder and globalthreads values required for NHWC layout
+        Rpp32s dimsOrder[2] = {0, 1};
+        int globalThreads_x = (dstGenericDescPtr->strides[1] / 3 + 7) >> 3; // W - width  (x direction) - vectorized for 8 element loads/stores per channel
+        int globalThreads_y = dstGenericDescPtr->dims[1];                   // H - height (y direction)
+        int globalThreads_z = 1;
+
+        // change the dimsOrder and globalthreads values if layout is NCHW
+        if (dstGenericDescPtr->layout == RpptLayout::NCHW)
+        {
+            dimsOrder[0] = 1;  // height
+            dimsOrder[1] = 2;  // width
+            globalThreads_x = (dstGenericDescPtr->strides[2] + 7) >> 3; // W - width  (x direction) - vectorized for 8 element loads/stores per channel
+            globalThreads_y = dstGenericDescPtr->dims[2];               // H - height (y direction)
+            globalThreads_z = 1;
+        }
+
+        for(int batchCount = 0; batchCount < dstGenericDescPtr->dims[0]; batchCount++)
+        {
+            Rpp32s *anchor = &anchorTensor[batchCount * numDims];
+            Rpp32s *shape = &shapeTensor[batchCount * numDims];
+            Rpp32u *roi = roiTensor + batchCount * numDims * 2;
+            Rpp32s *length = reinterpret_cast<Rpp32s *>(&roi[numDims]);
+            Rpp32u maxHeight = std::min(shape[dimsOrder[0]], length[dimsOrder[0]] - anchor[dimsOrder[0]]);
+            Rpp32u maxWidth = std::min(shape[dimsOrder[1]], length[dimsOrder[1]] - anchor[dimsOrder[1]]);
+            if (dstGenericDescPtr->layout == RpptLayout::NCHW)
+            {
+                T *srcPtrTemp = srcPtr + (batchCount * srcGenericDescPtr->strides[0]) + anchor[1] * srcGenericDescPtr->strides[2] + anchor[2];
+                T *dstPtrTemp = dstPtr + (batchCount * dstGenericDescPtr->strides[0]);
+                hipLaunchKernelGGL(slice_ncdhw_hip_tensor,
+                                   dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+                                   dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, 1),
+                                   0,
+                                   handle.GetStream(),
+                                   srcPtrTemp,
+                                   make_uint3(srcGenericDescPtr->strides[1], 0, srcGenericDescPtr->strides[2]),
+                                   dstPtrTemp,
+                                   make_uint3(dstGenericDescPtr->strides[1], 0, dstGenericDescPtr->strides[2]),
+                                   dstGenericDescPtr->dims[1],
+                                   make_uint3(1, maxHeight, maxWidth));
+            }
+            else if (dstGenericDescPtr->layout == RpptLayout::NHWC)
+            {
+                T *srcPtrTemp = srcPtr + (batchCount * srcGenericDescPtr->strides[0]) + anchor[0] * srcGenericDescPtr->strides[1] + anchor[1];
+                T *dstPtrTemp = dstPtr + (batchCount * dstGenericDescPtr->strides[0]);
+                hipLaunchKernelGGL(slice_ndhwc_hip_tensor,
+                                   dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), globalThreads_z),
+                                   dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, 1),
+                                   0,
+                                   handle.GetStream(),
+                                   srcPtrTemp,
+                                   make_uint2(1, srcGenericDescPtr->strides[1]),
+                                   dstPtrTemp,
+                                   make_uint2(1, dstGenericDescPtr->strides[1]),
+                                   make_uint3(1, maxHeight, maxWidth));
+            }
+        }
+    }
+    else if (numDims == 2)
+    {
+        // NHW
+        int globalThreads_x = (dstGenericDescPtr->strides[1] + 7) >> 3; // W - width (x direction) - vectorized for 8 element loads/stores per channel
+        int globalThreads_y = dstGenericDescPtr->dims[1];               // H - height (y direction)
+        int globalThreads_z = 1;
+        for(int batchCount = 0; batchCount < dstGenericDescPtr->dims[0]; batchCount++)
+        {
+            Rpp32s *anchor = &anchorTensor[batchCount * numDims];
+            Rpp32s *shape = &shapeTensor[batchCount * numDims];
+            Rpp32u *roi = roiTensor + batchCount * numDims * 2;
+            Rpp32s *length = reinterpret_cast<Rpp32s *>(&roi[numDims]);
+            Rpp32u maxHeight = std::min(shape[0], length[0] - anchor[0]);
+            Rpp32u maxWidth = std::min(shape[1], length[1] - anchor[1]);
+            T *srcPtrTemp = srcPtr + (batchCount * srcGenericDescPtr->strides[0]) + anchor[0] * srcGenericDescPtr->strides[2] + anchor[1];
+            T *dstPtrTemp = dstPtr + (batchCount * dstGenericDescPtr->strides[0]);
+
             hipLaunchKernelGGL(slice_ncdhw_hip_tensor,
                                dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
-                               dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                               dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, 1),
                                0,
                                handle.GetStream(),
-                               srcPtr + (batchCount * srcGenericDescPtr->strides[0]),
-                               make_uint3(srcGenericDescPtr->strides[1], srcGenericDescPtr->strides[2], srcGenericDescPtr->strides[3]),
-                               dstPtr + (batchCount * dstGenericDescPtr->strides[0]),
-                               make_uint3(dstGenericDescPtr->strides[1], dstGenericDescPtr->strides[2], dstGenericDescPtr->strides[3]),
-                               dstGenericDescPtr->dims[1],
-                               &roiGenericPtrSrc[batchCount]);
+                               srcPtrTemp,
+                               make_uint3(0, 0, srcGenericDescPtr->strides[1]),
+                               dstPtrTemp,
+                               make_uint3(0, 0, dstGenericDescPtr->strides[1]),
+                               1,
+                               make_uint3(1, maxHeight, maxWidth));
         }
     }
-    else if (dstGenericDescPtr->layout == RpptLayout::NDHWC)
+    else if (numDims == 1)
     {
-        int globalThreads_x = (dstGenericDescPtr->strides[2] / 3 + 7) >> 3; // W - width (x direction) - vectorized for 8 element loads/stores per channel
-        int globalThreads_y = dstGenericDescPtr->dims[2];               // H - height (y direction)
-        int globalThreads_z = dstGenericDescPtr->dims[1];               // D - depth (z direction)
-
+        int globalThreads_x = (dstGenericDescPtr->strides[0] + 7) >> 3; // W - width (x direction) - vectorized for 8 element loads/stores per channel
+        int globalThreads_y = 1;
+        int globalThreads_z = 1;
         for(int batchCount = 0; batchCount < dstGenericDescPtr->dims[0]; batchCount++)
         {
-            hipLaunchKernelGGL(slice_ndhwc_hip_tensor,
+            Rpp32s *anchor = &anchorTensor[batchCount * numDims];
+            Rpp32s *shape = &shapeTensor[batchCount * numDims];
+            Rpp32u *roi = roiTensor + batchCount * numDims * 2;
+            Rpp32s *length = reinterpret_cast<Rpp32s *>(&roi[numDims]);
+            Rpp32u maxLength = std::min(shape[0], length[0] - anchor[0]);
+            T *srcPtrTemp = srcPtr + (batchCount * srcGenericDescPtr->strides[0]) + anchor[0];
+            T *dstPtrTemp = dstPtr + (batchCount * dstGenericDescPtr->strides[0]);
+
+            hipLaunchKernelGGL(slice_ncdhw_hip_tensor,
                                dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
-                               dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                               dim3(LOCAL_THREADS_X, 1, 1),
                                0,
                                handle.GetStream(),
-                               srcPtr + (batchCount * srcGenericDescPtr->strides[0]),
-                               make_uint2(srcGenericDescPtr->strides[1], srcGenericDescPtr->strides[2]),
-                               dstPtr + (batchCount * dstGenericDescPtr->strides[0]),
-                               make_uint2(dstGenericDescPtr->strides[1], dstGenericDescPtr->strides[2]),
-                               &roiGenericPtrSrc[batchCount]);
+                               srcPtrTemp,
+                               make_uint3(0, 0, 1),
+                               dstPtrTemp,
+                               make_uint3(0, 0, 1),
+                               1,
+                               make_uint3(1, 1, maxLength));
         }
     }
 
diff --git a/src/modules/rppt_tensor_audio_augmentations.cpp b/src/modules/rppt_tensor_audio_augmentations.cpp
index b629d1590..e20211ec1 100644
--- a/src/modules/rppt_tensor_audio_augmentations.cpp
+++ b/src/modules/rppt_tensor_audio_augmentations.cpp
@@ -32,8 +32,8 @@ SOFTWARE.
 RppStatus rppt_non_silent_region_detection_host(RppPtr_t srcPtr,
                                                 RpptDescPtr srcDescPtr,
                                                 Rpp32s *srcLengthTensor,
-                                                Rpp32f *detectedIndexTensor,
-                                                Rpp32f *detectionLengthTensor,
+                                                Rpp32s *detectedIndexTensor,
+                                                Rpp32s *detectionLengthTensor,
                                                 Rpp32f cutOffDB,
                                                 Rpp32s windowLength,
                                                 Rpp32f referencePower,
diff --git a/src/modules/rppt_tensor_geometric_augmentations.cpp b/src/modules/rppt_tensor_geometric_augmentations.cpp
index 45a0d5221..d758aa676 100644
--- a/src/modules/rppt_tensor_geometric_augmentations.cpp
+++ b/src/modules/rppt_tensor_geometric_augmentations.cpp
@@ -1050,43 +1050,54 @@ RppStatus rppt_slice_host(RppPtr_t srcPtr,
                           RpptGenericDescPtr srcGenericDescPtr,
                           RppPtr_t dstPtr,
                           RpptGenericDescPtr dstGenericDescPtr,
-                          RpptROI3DPtr roiGenericPtrSrc,
-                          RpptRoi3DType roiType,
+                          Rpp32s *anchorTensor,
+                          Rpp32s *shapeTensor,
+                          RppPtr_t fillValue,
+                          bool enablePadding,
+                          Rpp32u *roiTensor,
                           rppHandle_t rppHandle)
 {
+    if ((srcGenericDescPtr->dataType != RpptDataType::F32) && (srcGenericDescPtr->dataType != RpptDataType::U8)) return RPP_ERROR_INVALID_SRC_DATATYPE;
+    if ((dstGenericDescPtr->dataType != RpptDataType::F32) && (dstGenericDescPtr->dataType != RpptDataType::U8)) return RPP_ERROR_INVALID_DST_DATATYPE;
+    if (srcGenericDescPtr->layout != dstGenericDescPtr->layout) return RPP_ERROR_LAYOUT_MISMATCH;
+
     RppLayoutParams layoutParams;
     if ((srcGenericDescPtr->layout == RpptLayout::NCDHW) && (dstGenericDescPtr->layout == RpptLayout::NCDHW))
         layoutParams = get_layout_params(srcGenericDescPtr->layout, srcGenericDescPtr->dims[1]);
     else if ((srcGenericDescPtr->layout == RpptLayout::NDHWC) && (dstGenericDescPtr->layout == RpptLayout::NDHWC))
         layoutParams = get_layout_params(srcGenericDescPtr->layout, srcGenericDescPtr->dims[4]);
-
-    if ((srcGenericDescPtr->dataType != RpptDataType::F32) && (srcGenericDescPtr->dataType != RpptDataType::U8)) return RPP_ERROR_INVALID_SRC_DATATYPE;
-    if ((dstGenericDescPtr->dataType != RpptDataType::F32) && (dstGenericDescPtr->dataType != RpptDataType::U8)) return RPP_ERROR_INVALID_DST_DATATYPE;
-    if ((srcGenericDescPtr->layout != RpptLayout::NCDHW) && (srcGenericDescPtr->layout != RpptLayout::NDHWC)) return RPP_ERROR_INVALID_SRC_LAYOUT;
-    if ((dstGenericDescPtr->layout != RpptLayout::NCDHW) && (dstGenericDescPtr->layout != RpptLayout::NDHWC)) return RPP_ERROR_INVALID_DST_LAYOUT;
-    if (srcGenericDescPtr->layout != dstGenericDescPtr->layout) return RPP_ERROR_INVALID_ARGUMENTS;
+    else if ((srcGenericDescPtr->layout == RpptLayout::NCHW) && (dstGenericDescPtr->layout == RpptLayout::NCHW))
+        layoutParams = get_layout_params(srcGenericDescPtr->layout, srcGenericDescPtr->dims[1]);
+    else if ((srcGenericDescPtr->layout == RpptLayout::NHWC) && (dstGenericDescPtr->layout == RpptLayout::NHWC))
+        layoutParams = get_layout_params(srcGenericDescPtr->layout, srcGenericDescPtr->dims[3]);
 
     if ((srcGenericDescPtr->dataType == RpptDataType::F32) && (dstGenericDescPtr->dataType == RpptDataType::F32))
     {
-        slice_f32_f32_host_tensor((Rpp32f*) (static_cast<Rpp8u*>(srcPtr) + srcGenericDescPtr->offsetInBytes),
-                                  srcGenericDescPtr,
-                                  (Rpp32f*) (static_cast<Rpp8u*>(dstPtr) + dstGenericDescPtr->offsetInBytes),
-                                  dstGenericDescPtr,
-                                  roiGenericPtrSrc,
-                                  roiType,
-                                  layoutParams,
-                                  rpp::deref(rppHandle));
+        slice_host_tensor(reinterpret_cast<Rpp32f*>(static_cast<Rpp8u*>(srcPtr) + srcGenericDescPtr->offsetInBytes),
+                          srcGenericDescPtr,
+                          reinterpret_cast<Rpp32f*>(static_cast<Rpp8u*>(dstPtr) + dstGenericDescPtr->offsetInBytes),
+                          dstGenericDescPtr,
+                          anchorTensor,
+                          shapeTensor,
+                          static_cast<Rpp32f *>(fillValue),
+                          enablePadding,
+                          roiTensor,
+                          layoutParams,
+                          rpp::deref(rppHandle));
     }
     else if ((srcGenericDescPtr->dataType == RpptDataType::U8) && (dstGenericDescPtr->dataType == RpptDataType::U8))
     {
-        slice_u8_u8_host_tensor(static_cast<Rpp8u*>(srcPtr) + srcGenericDescPtr->offsetInBytes,
-                                srcGenericDescPtr,
-                                static_cast<Rpp8u*>(dstPtr) + dstGenericDescPtr->offsetInBytes,
-                                dstGenericDescPtr,
-                                roiGenericPtrSrc,
-                                roiType,
-                                layoutParams,
-                                rpp::deref(rppHandle));
+        slice_host_tensor(static_cast<Rpp8u*>(srcPtr) + srcGenericDescPtr->offsetInBytes,
+                          srcGenericDescPtr,
+                          static_cast<Rpp8u*>(dstPtr) + dstGenericDescPtr->offsetInBytes,
+                          dstGenericDescPtr,
+                          anchorTensor,
+                          shapeTensor,
+                          static_cast<Rpp8u *>(fillValue),
+                          enablePadding,
+                          roiTensor,
+                          layoutParams,
+                          rpp::deref(rppHandle));
     }
 
     return RPP_SUCCESS;
@@ -1848,24 +1859,29 @@ RppStatus rppt_slice_gpu(RppPtr_t srcPtr,
                          RpptGenericDescPtr srcGenericDescPtr,
                          RppPtr_t dstPtr,
                          RpptGenericDescPtr dstGenericDescPtr,
-                         RpptROI3DPtr roiGenericPtrSrc,
-                         RpptRoi3DType roiType,
+                         Rpp32s *anchorTensor,
+                         Rpp32s *shapeTensor,
+                         RppPtr_t fillValue,
+                         bool enablePadding,
+                         Rpp32u *roiTensor,
                          rppHandle_t rppHandle)
 {
 #ifdef HIP_COMPILE
-    if ((srcGenericDescPtr->layout != RpptLayout::NCDHW) && (srcGenericDescPtr->layout != RpptLayout::NDHWC)) return RPP_ERROR_INVALID_SRC_LAYOUT;
-    if ((dstGenericDescPtr->layout != RpptLayout::NCDHW) && (dstGenericDescPtr->layout != RpptLayout::NDHWC)) return RPP_ERROR_INVALID_DST_LAYOUT;
-    if (srcGenericDescPtr->layout != dstGenericDescPtr->layout) return RPP_ERROR_INVALID_ARGUMENTS;
     if ((srcGenericDescPtr->dataType != RpptDataType::F32) && (srcGenericDescPtr->dataType != RpptDataType::U8)) return RPP_ERROR_INVALID_SRC_DATATYPE;
     if ((dstGenericDescPtr->dataType != RpptDataType::F32) && (dstGenericDescPtr->dataType != RpptDataType::U8)) return RPP_ERROR_INVALID_DST_DATATYPE;
+    if (srcGenericDescPtr->layout != dstGenericDescPtr->layout) return RPP_ERROR_LAYOUT_MISMATCH;
 
     if ((srcGenericDescPtr->dataType == RpptDataType::F32) && (dstGenericDescPtr->dataType == RpptDataType::F32))
     {
-        hip_exec_slice_tensor((Rpp32f*) (static_cast<Rpp8u*>(srcPtr) + srcGenericDescPtr->offsetInBytes),
+        hip_exec_slice_tensor(reinterpret_cast<Rpp32f*>(static_cast<Rpp8u*>(srcPtr) + srcGenericDescPtr->offsetInBytes),
                               srcGenericDescPtr,
-                              (Rpp32f*) (static_cast<Rpp8u*>(dstPtr) + dstGenericDescPtr->offsetInBytes),
+                              reinterpret_cast<Rpp32f*>(static_cast<Rpp8u*>(dstPtr) + dstGenericDescPtr->offsetInBytes),
                               dstGenericDescPtr,
-                              roiGenericPtrSrc,
+                              anchorTensor,
+                              shapeTensor,
+                              static_cast<Rpp32f *>(fillValue),
+                              enablePadding,
+                              roiTensor,
                               rpp::deref(rppHandle));
     }
     else if ((srcGenericDescPtr->dataType == RpptDataType::U8) && (dstGenericDescPtr->dataType == RpptDataType::U8))
@@ -1874,7 +1890,11 @@ RppStatus rppt_slice_gpu(RppPtr_t srcPtr,
                               srcGenericDescPtr,
                               static_cast<Rpp8u*>(dstPtr) + dstGenericDescPtr->offsetInBytes,
                               dstGenericDescPtr,
-                              roiGenericPtrSrc,
+                              anchorTensor,
+                              shapeTensor,
+                              static_cast<Rpp8u *>(fillValue),
+                              enablePadding,
+                              roiTensor,
                               rpp::deref(rppHandle));
     }
 
diff --git a/utilities/test_suite/HIP/Tensor_hip.cpp b/utilities/test_suite/HIP/Tensor_hip.cpp
index f6c9276ef..e7b880433 100644
--- a/utilities/test_suite/HIP/Tensor_hip.cpp
+++ b/utilities/test_suite/HIP/Tensor_hip.cpp
@@ -338,6 +338,14 @@ int main(int argc, char **argv)
         CHECK_RETURN_STATUS(hipHostMalloc(&reductionFuncResultArr, reductionFuncResultArrLength * bitDepthByteSize));
     }
 
+    // create generic descriptor and params in case of slice
+    RpptGenericDesc descriptor3D;
+    RpptGenericDescPtr descriptorPtr3D = &descriptor3D;
+    Rpp32s *anchorTensor = NULL, *shapeTensor = NULL;
+    Rpp32u *roiTensor = NULL;
+    if(testCase == 90)
+        set_generic_descriptor_slice(srcDescPtr, descriptorPtr3D, batchSize);
+
     // Allocate hip memory for src/dst
     CHECK_RETURN_STATUS(hipMalloc(&d_input, inputBufferSize));
     CHECK_RETURN_STATUS(hipMalloc(&d_output, outputBufferSize));
@@ -348,6 +356,7 @@ int main(int argc, char **argv)
     if(testCase == 82)
         CHECK_RETURN_STATUS(hipHostMalloc(&roiPtrInputCropRegion, 4 * sizeof(RpptROI)));
 
+    // create cropRoi and patchRoi in case of crop_and_patch
     RpptROI *cropRoi, *patchRoi;
     if(testCase == 33)
     {
@@ -1137,6 +1146,28 @@ int main(int argc, char **argv)
 
                     break;
                 }
+                case 90:
+                {
+                    testCaseName = "slice";
+                    Rpp32u numDims = descriptorPtr3D->numDims - 1; // exclude batchSize from input dims
+                    if(anchorTensor == NULL)
+                        CHECK_RETURN_STATUS(hipHostMalloc(&anchorTensor, batchSize * numDims * sizeof(Rpp32s)));
+                    if(shapeTensor == NULL)
+                        CHECK_RETURN_STATUS(hipHostMalloc(&shapeTensor, batchSize * numDims * sizeof(Rpp32s)));
+                    if(roiTensor == NULL)
+                        CHECK_RETURN_STATUS(hipHostMalloc(&roiTensor, batchSize * numDims * 2 * sizeof(Rpp32u)));
+                    bool enablePadding = false;
+                    auto fillValue = 0;
+                    init_slice(descriptorPtr3D, roiTensorPtrSrc, roiTensor, anchorTensor, shapeTensor);
+
+                    startWallTime = omp_get_wtime();
+                    if((inputBitDepth == 0 || inputBitDepth == 2) && srcDescPtr->layout == dstDescPtr->layout)
+                        rppt_slice_gpu(d_input, descriptorPtr3D, d_output, descriptorPtr3D, anchorTensor, shapeTensor, &fillValue, enablePadding, roiTensor, handle);
+                    else
+                        missingFuncFlag = 1;
+
+                    break;
+                }
                 default:
                     missingFuncFlag = 1;
                     break;
@@ -1232,6 +1263,42 @@ int main(int argc, char **argv)
                     refFile.close();
                 }
 
+                // if test case is slice and qaFlag is set, update the dstImgSizes with shapeTensor values
+                // for output display and comparision purposes
+                if (testCase == 90)
+                {
+                    if (dstDescPtr->layout == RpptLayout::NCHW)
+                    {
+                        if (dstDescPtr->c == 3)
+                        {
+                            for(int i = 0; i < batchSize; i++)
+                            {
+                                int idx1 = i * 3;
+                                dstImgSizes[i].height = shapeTensor[idx1 + 1];
+                                dstImgSizes[i].width = shapeTensor[idx1 + 2];
+                            }
+                        }
+                        else
+                        {
+                            for(int i = 0; i < batchSize; i++)
+                            {
+                                int idx1 = i * 2;
+                                dstImgSizes[i].height = shapeTensor[idx1];
+                                dstImgSizes[i].width = shapeTensor[idx1 + 1];
+                            }
+                        }
+                    }
+                    else if (dstDescPtr->layout == RpptLayout::NHWC)
+                    {
+                        for(int i = 0; i < batchSize; i++)
+                        {
+                            int idx1 = i * 3;
+                            dstImgSizes[i].height = shapeTensor[idx1];
+                            dstImgSizes[i].width = shapeTensor[idx1 + 1];
+                        }
+                    }
+                }
+
                 /*Compare the output of the function with golden outputs only if
                 1.QA Flag is set
                 2.input bit depth 0 (Input U8 && Output U8)
@@ -1294,6 +1361,12 @@ int main(int argc, char **argv)
     }
     if (reductionTypeCase)
         CHECK_RETURN_STATUS(hipHostFree(reductionFuncResultArr));
+    if(anchorTensor != NULL)
+        CHECK_RETURN_STATUS(hipHostFree(anchorTensor));
+    if(shapeTensor != NULL)
+        CHECK_RETURN_STATUS(hipHostFree(shapeTensor));
+    if(roiTensor != NULL)
+        CHECK_RETURN_STATUS(hipHostFree(roiTensor));
     free(input);
     free(input_second);
     free(output);
diff --git a/utilities/test_suite/HIP/Tensor_voxel_hip.cpp b/utilities/test_suite/HIP/Tensor_voxel_hip.cpp
index b39f461cc..44f2dea72 100644
--- a/utilities/test_suite/HIP/Tensor_voxel_hip.cpp
+++ b/utilities/test_suite/HIP/Tensor_voxel_hip.cpp
@@ -146,6 +146,10 @@ int main(int argc, char * argv[])
     void *pinnedMemArgs;
     CHECK_RETURN_STATUS(hipHostMalloc(&pinnedMemArgs, 2 * noOfFiles * sizeof(Rpp32f)));
 
+    // arguments required for slice
+    Rpp32s *anchorTensor = NULL, *shapeTensor = NULL;
+    Rpp32u *roiTensor = NULL;
+
     rppHandle_t handle;
     hipStream_t stream;
     CHECK_RETURN_STATUS(hipStreamCreate(&stream));
@@ -254,12 +258,21 @@ int main(int argc, char * argv[])
                 case 1:
                 {
                     testCaseName = "slice";
+                    if(anchorTensor == NULL)
+                        CHECK_RETURN_STATUS(hipHostMalloc(&anchorTensor, batchSize * 4 * sizeof(Rpp32s)));
+                    if(shapeTensor == NULL)
+                        CHECK_RETURN_STATUS(hipHostMalloc(&shapeTensor, batchSize * 4 * sizeof(Rpp32s)));
+                    if(roiTensor == NULL)
+                        CHECK_RETURN_STATUS(hipHostMalloc(&roiTensor, batchSize * 8 * sizeof(Rpp32u)));
+                    bool enablePadding = false;
+                    auto fillValue = 0;
+                    init_slice_voxel(descriptorPtr3D, roiGenericSrcPtr, roiTensor, anchorTensor, shapeTensor);
 
                     startWallTime = omp_get_wtime();
                     if (inputBitDepth == 0)
-                        rppt_slice_gpu(d_inputU8, descriptorPtr3D, d_outputU8, descriptorPtr3D, roiGenericSrcPtr, roiTypeSrc, handle);
+                        rppt_slice_gpu(d_inputU8, descriptorPtr3D, d_outputU8, descriptorPtr3D, anchorTensor, shapeTensor, &fillValue, enablePadding, roiTensor, handle);
                     else if(inputBitDepth == 2)
-                        rppt_slice_gpu(d_inputF32, descriptorPtr3D, d_outputF32, descriptorPtr3D, roiGenericSrcPtr, roiTypeSrc, handle);
+                        rppt_slice_gpu(d_inputF32, descriptorPtr3D, d_outputF32, descriptorPtr3D, anchorTensor, shapeTensor, &fillValue, enablePadding, roiTensor, handle);
                     else
                         missingFuncFlag = 1;
 
@@ -413,6 +426,39 @@ int main(int argc, char * argv[])
                     outputF32[i] = static_cast<float>(outputU8[i]);
             }
 
+            // if test case is slice and qaFlag is set, update the ROI with shapeTensor values
+            // for output display and comparison purposes
+            if(testCase == 1)
+            {
+                // update the roi for comparision with the shapeTensor values
+                if (descriptorPtr3D->layout == RpptLayout::NCDHW)
+                {
+                    for(int i = 0; i < batchSize; i++)
+                    {
+                        int idx1 = i * 4;
+                        roiGenericSrcPtr[i].xyzwhdROI.xyz.x = 0;
+                        roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0;
+                        roiGenericSrcPtr[i].xyzwhdROI.xyz.z = 0;
+                        roiGenericSrcPtr[i].xyzwhdROI.roiDepth = shapeTensor[idx1 + 1];
+                        roiGenericSrcPtr[i].xyzwhdROI.roiHeight = shapeTensor[idx1 + 2];
+                        roiGenericSrcPtr[i].xyzwhdROI.roiWidth = shapeTensor[idx1 + 3];
+                    }
+                }
+                else if(descriptorPtr3D->layout == RpptLayout::NDHWC)
+                {
+                    for(int i = 0; i < batchSize; i++)
+                    {
+                        int idx1 = i * 4;
+                        roiGenericSrcPtr[i].xyzwhdROI.xyz.x = 0;
+                        roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0;
+                        roiGenericSrcPtr[i].xyzwhdROI.xyz.z = 0;
+                        roiGenericSrcPtr[i].xyzwhdROI.roiDepth = shapeTensor[idx1];
+                        roiGenericSrcPtr[i].xyzwhdROI.roiHeight = shapeTensor[idx1 + 1];
+                        roiGenericSrcPtr[i].xyzwhdROI.roiWidth = shapeTensor[idx1 + 2];
+                    }
+                }
+            }
+
             /*Compare the output of the function with golden outputs only if
             1.QA Flag is set
             2.input bit depth 2 (F32)*/
@@ -502,6 +548,12 @@ int main(int argc, char * argv[])
     CHECK_RETURN_STATUS(hipHostFree(pinnedMemArgs));
     CHECK_RETURN_STATUS(hipFree(d_inputF32));
     CHECK_RETURN_STATUS(hipFree(d_outputF32));
+    if(anchorTensor != NULL)
+        CHECK_RETURN_STATUS(hipHostFree(anchorTensor));
+    if(shapeTensor != NULL)
+        CHECK_RETURN_STATUS(hipHostFree(shapeTensor));
+    if(roiTensor != NULL)
+        CHECK_RETURN_STATUS(hipHostFree(roiTensor));
     if(inputBitDepth == 0)
     {
         if(inputU8 != NULL)
diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py
index 76fb7d15c..28db84bee 100644
--- a/utilities/test_suite/HIP/runTests.py
+++ b/utilities/test_suite/HIP/runTests.py
@@ -39,7 +39,7 @@
 outFolderPath = os.getcwd()
 buildFolderPath = os.getcwd()
 caseMin = 0
-caseMax = 89
+caseMax = 90
 
 # Get a list of log files based on a flag for preserving output
 def get_log_file_list(preserveOutput):
@@ -327,7 +327,7 @@ def rpp_test_suite_parser_and_validator():
 subprocess.run(["make", "-j16"], cwd=".")    # nosec
 
 # List of cases supported
-supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '33', '34', '36', '37', '38', '39', '45', '46', '54', '61', '63', '65', '68', '70', '80', '82', '83', '84', '85', '86', '87', '88', '89']
+supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '33', '34', '36', '37', '38', '39', '45', '46', '54', '61', '63', '65', '68', '70', '80', '82', '83', '84', '85', '86', '87', '88', '89', '90']
 
 # Create folders based on testType and profilingOption
 if testType == 1 and profilingOption == "YES":
diff --git a/utilities/test_suite/HOST/Tensor_host.cpp b/utilities/test_suite/HOST/Tensor_host.cpp
index 19aa5452f..15a94095d 100644
--- a/utilities/test_suite/HOST/Tensor_host.cpp
+++ b/utilities/test_suite/HOST/Tensor_host.cpp
@@ -339,6 +339,15 @@ int main(int argc, char **argv)
         }
     }
 
+    // create generic descriptor and params in case of slice
+    RpptGenericDesc descriptor3D;
+    RpptGenericDescPtr descriptorPtr3D = &descriptor3D;
+    Rpp32s *anchorTensor = NULL, *shapeTensor = NULL;
+    Rpp32u *roiTensor = NULL;
+    if(testCase == 90)
+        set_generic_descriptor_slice(srcDescPtr, descriptorPtr3D, batchSize);
+
+    // create cropRoi and patchRoi in case of crop_and_patch
     RpptROI *cropRoi, *patchRoi;
     if(testCase == 33)
     {
@@ -1171,6 +1180,30 @@ int main(int argc, char **argv)
 
                     break;
                 }
+                case 90:
+                {
+                    testCaseName = "slice";
+                    Rpp32u numDims = descriptorPtr3D->numDims - 1; // exclude batchSize from input dims
+                    if(anchorTensor == NULL)
+                        anchorTensor = static_cast<Rpp32s*>(calloc(batchSize * numDims, sizeof(Rpp32s)));;
+                    if(shapeTensor == NULL)
+                       shapeTensor = static_cast<Rpp32s*>(calloc(batchSize * numDims, sizeof(Rpp32s)));;
+                    if(roiTensor == NULL)
+                        roiTensor = static_cast<Rpp32u*>(calloc(batchSize * numDims * 2, sizeof(Rpp32u)));;
+                    bool enablePadding = false;
+                    auto fillValue = 0;
+                    init_slice(descriptorPtr3D, roiTensorPtrSrc, roiTensor, anchorTensor, shapeTensor);
+
+                    startWallTime = omp_get_wtime();
+                    startCpuTime = clock();
+
+                    if((inputBitDepth == 0 || inputBitDepth == 2) && srcDescPtr->layout == dstDescPtr->layout)
+                        rppt_slice_host(input, descriptorPtr3D, output, descriptorPtr3D, anchorTensor, shapeTensor, &fillValue, enablePadding, roiTensor, handle);
+                    else
+                        missingFuncFlag = 1;
+
+                    break;
+                }
                 default:
                     missingFuncFlag = 1;
                     break;
@@ -1268,6 +1301,42 @@ int main(int argc, char **argv)
                     refFile.close();
                 }
 
+                // if test case is slice and qaFlag is set, update the dstImgSizes with shapeTensor values
+                // for output display and comparision purposes
+                if (testCase == 90)
+                {
+                    if (dstDescPtr->layout == RpptLayout::NCHW)
+                    {
+                        if (dstDescPtr->c == 3)
+                        {
+                            for(int i = 0; i < batchSize; i++)
+                            {
+                                int idx1 = i * 3;
+                                dstImgSizes[i].height = shapeTensor[idx1 + 1];
+                                dstImgSizes[i].width = shapeTensor[idx1 + 2];
+                            }
+                        }
+                        else
+                        {
+                            for(int i = 0; i < batchSize; i++)
+                            {
+                                int idx1 = i * 2;
+                                dstImgSizes[i].height = shapeTensor[idx1];
+                                dstImgSizes[i].width = shapeTensor[idx1 + 1];
+                            }
+                        }
+                    }
+                    else if (dstDescPtr->layout == RpptLayout::NHWC)
+                    {
+                        for(int i = 0; i < batchSize; i++)
+                        {
+                            int idx1 = i * 3;
+                            dstImgSizes[i].height = shapeTensor[idx1];
+                            dstImgSizes[i].width = shapeTensor[idx1 + 1];
+                        }
+                    }
+                }
+
                 /*Compare the output of the function with golden outputs only if
                 1.QA Flag is set
                 2.input bit depth 0 (Input U8 && Output U8)
@@ -1324,6 +1393,12 @@ int main(int argc, char **argv)
     free(roiTensorPtrSrc);
     free(roiTensorPtrDst);
     free(dstImgSizes);
+    if(anchorTensor != NULL)
+        free(anchorTensor);
+    if(shapeTensor != NULL)
+        free(shapeTensor);
+    if(roiTensor != NULL)
+        free(roiTensor);
     free(input);
     free(inputu8);
     free(inputu8Second);
diff --git a/utilities/test_suite/HOST/Tensor_host_audio.cpp b/utilities/test_suite/HOST/Tensor_host_audio.cpp
index 35ce3a752..ac05bcc90 100644
--- a/utilities/test_suite/HOST/Tensor_host_audio.cpp
+++ b/utilities/test_suite/HOST/Tensor_host_audio.cpp
@@ -109,6 +109,19 @@ int main(int argc, char **argv)
         maxDstChannels = 1;
     set_audio_descriptor_dims_and_strides(dstDescPtr, batchSize, maxDstHeight, maxDstWidth, maxDstChannels, offsetInBytes);
 
+    // create generic descriptor in case of slice
+    RpptGenericDesc descriptor3D;
+    RpptGenericDescPtr descriptorPtr3D = &descriptor3D;
+    if(testCase == 4)
+    {
+        descriptorPtr3D->numDims = 2;
+        descriptorPtr3D->offsetInBytes = 0;
+        descriptorPtr3D->dataType = RpptDataType::F32;
+        descriptorPtr3D->dims[0] = batchSize;
+        descriptorPtr3D->dims[1] = maxSrcWidth;
+        descriptorPtr3D->strides[0] = descriptorPtr3D->dims[1];
+    }
+
     // set buffer sizes for src/dst
     iBufferSize = (Rpp64u)srcDescPtr->h * (Rpp64u)srcDescPtr->w * (Rpp64u)srcDescPtr->c * (Rpp64u)srcDescPtr->n;
     oBufferSize = (Rpp64u)dstDescPtr->h * (Rpp64u)dstDescPtr->w * (Rpp64u)dstDescPtr->c * (Rpp64u)dstDescPtr->n;
@@ -145,8 +158,8 @@ int main(int argc, char **argv)
                 case 0:
                 {
                     testCaseName = "non_silent_region_detection";
-                    Rpp32f detectedIndex[batchSize];
-                    Rpp32f detectionLength[batchSize];
+                    Rpp32s detectedIndex[batchSize];
+                    Rpp32s detectionLength[batchSize];
                     Rpp32f cutOffDB = -60.0;
                     Rpp32s windowLength = 2048;
                     Rpp32f referencePower = 0.0f;
@@ -265,6 +278,32 @@ int main(int argc, char **argv)
 
                     break;
                 }
+                case 5:
+                {
+                    testCaseName = "slice";
+                    Rpp32u nDim = 1; // testing for 1D slice
+                    auto fillValue = 0;
+                    bool enablePadding = true;
+                    Rpp32u roiTensor[batchSize * nDim * 2];
+                    Rpp32s anchorTensor[batchSize * nDim];
+                    Rpp32s shapeTensor[batchSize * nDim];
+
+                    // 1D slice arguments
+                    for (int i = 0; i < batchSize; i++)
+                    {
+                        int idx = i * nDim * 2;
+                        roiTensor[idx] = 10;
+                        roiTensor[idx + 1] = srcLengthTensor[i];
+                        anchorTensor[i] = 10;
+                        shapeTensor[i] = dstDims[i].width = srcLengthTensor[i] / 2;
+                        dstDims[i].height = 1;
+                    }
+
+                    startWallTime = omp_get_wtime();
+                    rppt_slice_host(inputf32, descriptorPtr3D, outputf32, descriptorPtr3D, anchorTensor, shapeTensor, &fillValue, enablePadding, roiTensor, handle);
+
+                    break;
+                }
                 case 6:
                 {
                     testCaseName = "resample";
diff --git a/utilities/test_suite/HOST/Tensor_voxel_host.cpp b/utilities/test_suite/HOST/Tensor_voxel_host.cpp
index 968f29b54..89fddf2c6 100644
--- a/utilities/test_suite/HOST/Tensor_voxel_host.cpp
+++ b/utilities/test_suite/HOST/Tensor_voxel_host.cpp
@@ -140,6 +140,10 @@ int main(int argc, char * argv[])
     void *pinnedMemArgs;
     pinnedMemArgs = calloc(2 * noOfFiles , sizeof(Rpp32f));
 
+    // arguments required for slice
+    Rpp32s *anchorTensor = NULL, *shapeTensor = NULL;
+    Rpp32u *roiTensor = NULL;
+
     // Set the number of threads to be used by OpenMP pragma for RPP batch processing on host.
     // If numThreads value passed is 0, number of OpenMP threads used by RPP will be set to batch size
     Rpp32u numThreads = 0;
@@ -241,11 +245,21 @@ int main(int argc, char * argv[])
                 case 1:
                 {
                     testCaseName = "slice";
+                    if(anchorTensor == NULL)
+                        anchorTensor = static_cast<Rpp32s*>(calloc(batchSize * 4, sizeof(Rpp32s)));;
+                    if(shapeTensor == NULL)
+                       shapeTensor = static_cast<Rpp32s*>(calloc(batchSize * 4, sizeof(Rpp32s)));;
+                    if(roiTensor == NULL)
+                        roiTensor = static_cast<Rpp32u*>(calloc(batchSize * 8, sizeof(Rpp32u)));;
+                    bool enablePadding = false;
+                    auto fillValue = 0;
+                    init_slice_voxel(descriptorPtr3D, roiGenericSrcPtr, roiTensor, anchorTensor, shapeTensor);
+
                     startWallTime = omp_get_wtime();
                     if(inputBitDepth == 0)
-                        rppt_slice_host(inputU8, descriptorPtr3D, outputU8, descriptorPtr3D, roiGenericSrcPtr, roiTypeSrc, handle);
+                        rppt_slice_host(inputU8, descriptorPtr3D, outputU8, descriptorPtr3D, anchorTensor, shapeTensor, &fillValue, enablePadding, roiTensor, handle);
                     else if(inputBitDepth == 2)
-                        rppt_slice_host(inputF32, descriptorPtr3D, outputF32, descriptorPtr3D, roiGenericSrcPtr, roiTypeSrc, handle);
+                        rppt_slice_host(inputF32, descriptorPtr3D, outputF32, descriptorPtr3D, anchorTensor, shapeTensor, &fillValue, enablePadding, roiTensor, handle);
                     else
                         missingFuncFlag = 1;
 
@@ -393,6 +407,39 @@ int main(int argc, char * argv[])
                     outputF32[i] = static_cast<float>(outputU8[i]);
             }
 
+            // if test case is slice and qaFlag is set, update the ROI with shapeTensor values
+            // for output display and comparison purposes
+            if(testCase == 1)
+            {
+                // update the roi for comparision with the shapeTensor values
+                if (descriptorPtr3D->layout == RpptLayout::NCDHW)
+                {
+                    for(int i = 0; i < batchSize; i++)
+                    {
+                        int idx1 = i * 4;
+                        roiGenericSrcPtr[i].xyzwhdROI.xyz.x = 0;
+                        roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0;
+                        roiGenericSrcPtr[i].xyzwhdROI.xyz.z = 0;
+                        roiGenericSrcPtr[i].xyzwhdROI.roiDepth = shapeTensor[idx1 + 1];
+                        roiGenericSrcPtr[i].xyzwhdROI.roiHeight = shapeTensor[idx1 + 2];
+                        roiGenericSrcPtr[i].xyzwhdROI.roiWidth = shapeTensor[idx1 + 3];
+                    }
+                }
+                else if(descriptorPtr3D->layout == RpptLayout::NDHWC)
+                {
+                    for(int i = 0; i < batchSize; i++)
+                    {
+                        int idx1 = i * 4;
+                        roiGenericSrcPtr[i].xyzwhdROI.xyz.x = 0;
+                        roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0;
+                        roiGenericSrcPtr[i].xyzwhdROI.xyz.z = 0;
+                        roiGenericSrcPtr[i].xyzwhdROI.roiDepth = shapeTensor[idx1];
+                        roiGenericSrcPtr[i].xyzwhdROI.roiHeight = shapeTensor[idx1 + 1];
+                        roiGenericSrcPtr[i].xyzwhdROI.roiWidth = shapeTensor[idx1 + 2];
+                    }
+                }
+            }
+
             /*Compare the output of the function with golden outputs only if
             1.QA Flag is set
             2.input bit depth 2 (F32)*/
@@ -481,6 +528,12 @@ int main(int argc, char * argv[])
     free(outputF32);
     free(roiGenericSrcPtr);
     free(pinnedMemArgs);
+    if(anchorTensor != NULL)
+        free(anchorTensor);
+    if(shapeTensor != NULL)
+        free(shapeTensor);
+    if(roiTensor != NULL)
+        free(roiTensor);
     if(inputBitDepth == 0)
     {
         if(inputU8 != NULL)
diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py
index ddde7db9c..8bedd5044 100644
--- a/utilities/test_suite/HOST/runTests.py
+++ b/utilities/test_suite/HOST/runTests.py
@@ -40,7 +40,7 @@
 outFolderPath = os.getcwd()
 buildFolderPath = os.getcwd()
 caseMin = 0
-caseMax = 89
+caseMax = 90
 
 # Get a list of log files based on a flag for preserving output
 def get_log_file_list(preserveOutput):
@@ -283,7 +283,7 @@ def rpp_test_suite_parser_and_validator():
 subprocess.run(["make", "-j16"], cwd=".")    # nosec
 
 # List of cases supported
-supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '33', '34', '36', '37', '38', '39', '45', '46', '54', '61', '63', '65', '68', '70', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89']
+supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '33', '34', '36', '37', '38', '39', '45', '46', '54', '61', '63', '65', '68', '70', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90']
 
 print("\n\n\n\n\n")
 print("##########################################################################################")
diff --git a/utilities/test_suite/REFERENCE_OUTPUT/slice/slice_u8_Tensor.bin b/utilities/test_suite/REFERENCE_OUTPUT/slice/slice_u8_Tensor.bin
new file mode 100644
index 000000000..ac379dc79
Binary files /dev/null and b/utilities/test_suite/REFERENCE_OUTPUT/slice/slice_u8_Tensor.bin differ
diff --git a/utilities/test_suite/REFERENCE_OUTPUTS_AUDIO/slice/slice.bin b/utilities/test_suite/REFERENCE_OUTPUTS_AUDIO/slice/slice.bin
new file mode 100644
index 000000000..10b45a6ff
Binary files /dev/null and b/utilities/test_suite/REFERENCE_OUTPUTS_AUDIO/slice/slice.bin differ
diff --git a/utilities/test_suite/REFERENCE_OUTPUT_VOXEL/slice/slice.bin b/utilities/test_suite/REFERENCE_OUTPUT_VOXEL/slice/slice.bin
deleted file mode 100644
index 94802b30b..000000000
Binary files a/utilities/test_suite/REFERENCE_OUTPUT_VOXEL/slice/slice.bin and /dev/null differ
diff --git a/utilities/test_suite/REFERENCE_OUTPUT_VOXEL/slice/slice_nifti_output.bin b/utilities/test_suite/REFERENCE_OUTPUT_VOXEL/slice/slice_nifti_output.bin
index 94802b30b..f51ec113c 100644
Binary files a/utilities/test_suite/REFERENCE_OUTPUT_VOXEL/slice/slice_nifti_output.bin and b/utilities/test_suite/REFERENCE_OUTPUT_VOXEL/slice/slice_nifti_output.bin differ
diff --git a/utilities/test_suite/rpp_test_suite_audio.h b/utilities/test_suite/rpp_test_suite_audio.h
index 4367df17c..8ff5815c0 100644
--- a/utilities/test_suite/rpp_test_suite_audio.h
+++ b/utilities/test_suite/rpp_test_suite_audio.h
@@ -41,6 +41,7 @@ std::map<int, string> audioAugmentationMap =
     {2, "pre_emphasis_filter"},
     {3, "down_mixing"},
     {4, "spectrogram"},
+    {5, "slice"},
     {6, "resample"}
 };
 
@@ -231,7 +232,7 @@ void verify_output(Rpp32f *dstPtr, RpptDescPtr dstDescPtr, RpptImagePatchPtr dst
     free(refOutput);
 }
 
-void verify_non_silent_region_detection(float *detectedIndex, float *detectionLength, string testCase, int bs, vector<string> audioNames, string dst)
+void verify_non_silent_region_detection(int *detectedIndex, int *detectionLength, string testCase, int bs, vector<string> audioNames, string dst)
 {
     int fileMatch = 0;
     for (int i = 0; i < bs; i++)
diff --git a/utilities/test_suite/rpp_test_suite_common.h b/utilities/test_suite/rpp_test_suite_common.h
index 3ec123d7f..ea3117041 100644
--- a/utilities/test_suite/rpp_test_suite_common.h
+++ b/utilities/test_suite/rpp_test_suite_common.h
@@ -106,6 +106,7 @@ std::map<int, string> augmentationMap =
     {87, "tensor_sum"},
     {88, "tensor_min"},
     {89, "tensor_max"},
+    {90, "slice"}
 };
 
 // Golden outputs for Tensor min Kernel
@@ -456,6 +457,43 @@ inline void set_generic_descriptor(RpptGenericDescPtr descriptorPtr3D, int noOfI
     descriptorPtr3D->strides[4] = 1;
 }
 
+// sets generic descriptor dimensions and strides of src/dst for slice functionality
+inline void set_generic_descriptor_slice(RpptDescPtr srcDescPtr, RpptGenericDescPtr descriptorPtr3D, int batchSize)
+{
+    descriptorPtr3D->offsetInBytes = 0;
+    descriptorPtr3D->dataType = srcDescPtr->dataType;
+    descriptorPtr3D->layout = srcDescPtr->layout;
+    if(srcDescPtr->c == 3)
+    {
+        descriptorPtr3D->numDims = 4;
+        descriptorPtr3D->dims[0] = batchSize;
+        if (srcDescPtr->layout == RpptLayout::NHWC)
+        {
+            descriptorPtr3D->dims[1] = srcDescPtr->h;
+            descriptorPtr3D->dims[2] = srcDescPtr->w;
+            descriptorPtr3D->dims[3] = srcDescPtr->c;
+        }
+        else
+        {
+            descriptorPtr3D->dims[1] = srcDescPtr->c;
+            descriptorPtr3D->dims[2] = srcDescPtr->h;
+            descriptorPtr3D->dims[3] = srcDescPtr->w;
+        }
+        descriptorPtr3D->strides[0] = descriptorPtr3D->dims[1] * descriptorPtr3D->dims[2] * descriptorPtr3D->dims[3];
+        descriptorPtr3D->strides[1] = descriptorPtr3D->dims[2] * descriptorPtr3D->dims[3];
+        descriptorPtr3D->strides[2] = descriptorPtr3D->dims[3];
+    }
+    else
+    {
+        descriptorPtr3D->numDims = 3;
+        descriptorPtr3D->dims[0] = batchSize;
+        descriptorPtr3D->dims[1] = srcDescPtr->h;
+        descriptorPtr3D->dims[2] = srcDescPtr->w;
+        descriptorPtr3D->strides[0] = descriptorPtr3D->dims[1] * descriptorPtr3D->dims[2];
+        descriptorPtr3D->strides[1] = descriptorPtr3D->dims[2];
+    }
+}
+
 // sets descriptor dimensions and strides of src/dst
 inline void set_descriptor_dims_and_strides(RpptDescPtr descPtr, int noOfImages, int maxHeight, int maxWidth, int numChannels, int offsetInBytes)
 {
@@ -1295,4 +1333,60 @@ void inline init_ricap(int width, int height, int batchSize, Rpp32u *permutation
     roiPtrInputCropRegion[1].xywhROI = {randrange(0, part0Width - 8), randrange(0, height - part0Height), width - part0Width, part0Height};
     roiPtrInputCropRegion[2].xywhROI = {randrange(0, width - part0Width - 8), randrange(0, part0Height), part0Width, height - part0Height};
     roiPtrInputCropRegion[3].xywhROI = {randrange(0, part0Width - 8), randrange(0, part0Height), width - part0Width, height - part0Height};
-}
\ No newline at end of file
+}
+
+// initialize the roi, anchor and shape values required for slice
+void init_slice(RpptGenericDescPtr descriptorPtr3D, RpptROIPtr roiPtrSrc, Rpp32u *roiTensor, Rpp32s *anchorTensor, Rpp32s *shapeTensor)
+{
+    if(descriptorPtr3D->numDims == 4)
+    {
+        if (descriptorPtr3D->layout == RpptLayout::NCHW)
+        {
+            for(int i = 0; i < descriptorPtr3D->dims[0]; i++)
+            {
+                int idx1 = i * 3;
+                int idx2 = i * 6;
+                roiTensor[idx2] = anchorTensor[idx1] = 0;
+                roiTensor[idx2 + 1] = anchorTensor[idx1 + 1] = roiPtrSrc[i].xywhROI.xy.y;
+                roiTensor[idx2 + 2] = anchorTensor[idx1 + 2] = roiPtrSrc[i].xywhROI.xy.x;
+                roiTensor[idx2 + 3] = descriptorPtr3D->dims[1];
+                roiTensor[idx2 + 4] = roiPtrSrc[i].xywhROI.roiHeight;
+                roiTensor[idx2 + 5] = roiPtrSrc[i].xywhROI.roiWidth;
+                shapeTensor[idx1] = roiTensor[idx2 + 3];
+                shapeTensor[idx1 + 1] = roiTensor[idx2 + 4] / 2;
+                shapeTensor[idx1 + 2] = roiTensor[idx2 + 5] / 2;
+            }
+        }
+        else if(descriptorPtr3D->layout == RpptLayout::NHWC)
+        {
+            for(int i = 0; i < descriptorPtr3D->dims[0]; i++)
+            {
+                int idx1 = i * 3;
+                int idx2 = i * 6;
+                roiTensor[idx2] = anchorTensor[idx1] = roiPtrSrc[i].xywhROI.xy.y;
+                roiTensor[idx2 + 1] = anchorTensor[idx1 + 1] = roiPtrSrc[i].xywhROI.xy.x;
+                roiTensor[idx2 + 2] = anchorTensor[idx1 + 2] = 0;
+                roiTensor[idx2 + 3] = roiPtrSrc[i].xywhROI.roiHeight;
+                roiTensor[idx2 + 4] = roiPtrSrc[i].xywhROI.roiWidth;
+                roiTensor[idx2 + 5] = descriptorPtr3D->dims[3];
+                shapeTensor[idx1] = roiTensor[idx2 + 3] / 2;
+                shapeTensor[idx1 + 1] = roiTensor[idx2 + 4] / 2;
+                shapeTensor[idx1 + 2] = roiTensor[idx2 + 5];
+            }
+        }
+    }
+    if(descriptorPtr3D->numDims == 3)
+    {
+        for(int i = 0; i < descriptorPtr3D->dims[0]; i++)
+        {
+            int idx1 = i * 2;
+            int idx2 = i * 4;
+            roiTensor[idx2] = anchorTensor[idx1] = roiPtrSrc[i].xywhROI.xy.y;
+            roiTensor[idx2 + 1] = anchorTensor[idx1 + 1] = roiPtrSrc[i].xywhROI.xy.x;
+            roiTensor[idx2 + 2] = roiPtrSrc[i].xywhROI.roiHeight;
+            roiTensor[idx2 + 3] = roiPtrSrc[i].xywhROI.roiWidth;
+            shapeTensor[idx1] = roiTensor[idx2 + 2] / 2;
+            shapeTensor[idx1 + 1] = roiTensor[idx2 + 3] / 2;
+        }
+    }
+}
diff --git a/utilities/test_suite/rpp_test_suite_voxel.h b/utilities/test_suite/rpp_test_suite_voxel.h
index c24fc29a8..77de34f3b 100644
--- a/utilities/test_suite/rpp_test_suite_voxel.h
+++ b/utilities/test_suite/rpp_test_suite_voxel.h
@@ -242,6 +242,51 @@ inline string set_function_type(int layoutType, int pln1OutTypeCase, int outputF
     return funcType;
 }
 
+// initialize the roi, anchor and shape values required for slice
+void init_slice_voxel(RpptGenericDescPtr descriptorPtr3D, RpptROI3D *roiGenericSrcPtr, Rpp32u *roiTensor, Rpp32s *anchorTensor, Rpp32s *shapeTensor)
+{
+    if (descriptorPtr3D->layout == RpptLayout::NCDHW)
+    {
+        for(int i = 0; i < descriptorPtr3D->dims[0]; i++)
+        {
+            int idx1 = i * 4;
+            int idx2 = i * 8;
+            roiTensor[idx2] = anchorTensor[idx1] = 0;
+            roiTensor[idx2 + 1] = anchorTensor[idx1 + 1] = roiGenericSrcPtr[i].xyzwhdROI.xyz.z;
+            roiTensor[idx2 + 2] = anchorTensor[idx1 + 2] = roiGenericSrcPtr[i].xyzwhdROI.xyz.y;
+            roiTensor[idx2 + 3] = anchorTensor[idx1 + 3] = roiGenericSrcPtr[i].xyzwhdROI.xyz.x;
+            roiTensor[idx2 + 4] = descriptorPtr3D->dims[1];
+            roiTensor[idx2 + 5] = roiGenericSrcPtr[i].xyzwhdROI.roiDepth;
+            roiTensor[idx2 + 6] = roiGenericSrcPtr[i].xyzwhdROI.roiHeight;
+            roiTensor[idx2 + 7] = roiGenericSrcPtr[i].xyzwhdROI.roiWidth;
+            shapeTensor[idx1] = roiTensor[idx2 + 4];
+            shapeTensor[idx1 + 1] = roiTensor[idx2 + 5] / 2;
+            shapeTensor[idx1 + 2] = roiTensor[idx2 + 6] / 2;
+            shapeTensor[idx1 + 3] = roiTensor[idx2 + 7] / 2;
+        }
+    }
+    else if(descriptorPtr3D->layout == RpptLayout::NDHWC)
+    {
+        for(int i = 0; i < descriptorPtr3D->dims[0]; i++)
+        {
+            int idx1 = i * 4;
+            int idx2 = i * 8;
+            roiTensor[idx2] = anchorTensor[idx1] = roiGenericSrcPtr[i].xyzwhdROI.xyz.z;
+            roiTensor[idx2 + 1] = anchorTensor[idx1 + 1] = roiGenericSrcPtr[i].xyzwhdROI.xyz.y;
+            roiTensor[idx2 + 2] = anchorTensor[idx1 + 2] = roiGenericSrcPtr[i].xyzwhdROI.xyz.x;
+            roiTensor[idx2 + 3] = anchorTensor[idx1 + 3] = 0;
+            roiTensor[idx2 + 4] = roiGenericSrcPtr[i].xyzwhdROI.roiDepth;
+            roiTensor[idx2 + 5] = roiGenericSrcPtr[i].xyzwhdROI.roiHeight;
+            roiTensor[idx2 + 6] = roiGenericSrcPtr[i].xyzwhdROI.roiWidth;
+            roiTensor[idx2 + 7] = descriptorPtr3D->dims[4];
+            shapeTensor[idx1] = roiTensor[idx2 + 4] / 2;
+            shapeTensor[idx1 + 1] = roiTensor[idx2 + 5] / 2;
+            shapeTensor[idx1 + 2] = roiTensor[idx2 + 6] / 2;
+            shapeTensor[idx1 + 3] = roiTensor[idx2 + 7];
+        }
+    }
+}
+
 // reads nifti-1 header file
 static int read_nifti_header_file(char* const header_file, nifti_1_header &niftiHeader)
 {