From 6e340d0957f0c99454a168803c022821895c868a Mon Sep 17 00:00:00 2001
From: zhoushunjie <zhoushunjie@baidu.com>
Date: Fri, 12 Aug 2022 13:04:27 +0800
Subject: [PATCH 1/2] Add argmax argmin function

---
 csrc/fastdeploy/function/reduce.cc | 148 ++++++++++++++++++++++++++++-
 csrc/fastdeploy/function/reduce.h  |  28 ++++++
 csrc/fastdeploy/utils/utils.h      |  20 ++++
 3 files changed, 193 insertions(+), 3 deletions(-)
diff --git a/csrc/fastdeploy/function/reduce.cc b/csrc/fastdeploy/function/reduce.cc
index 6aeedf488cc..bd4870d0679 100644
--- a/csrc/fastdeploy/function/reduce.cc
+++ b/csrc/fastdeploy/function/reduce.cc
@@ -14,6 +14,7 @@
 
 #include "fastdeploy/function/reduce.h"
 
+#include <limits>
 #include <set>
 
 #include "fastdeploy/function/eigen.h"
@@ -215,9 +216,133 @@ void Reduce(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
   }
   reduce_all = (reduce_all || full_dim);
 
-  FD_VISIT_ALL_TYPES(x.dtype, "ReduceKernelImpl", ([&] {
-                       ReduceKernelImpl<data_t, Functor>(x, out, dims, keep_dim,
-                                                         reduce_all);
+  FD_VISIT_INT_FLOAT_TYPES(x.dtype, "ReduceKernelImpl", ([&] {
+                             ReduceKernelImpl<data_t, Functor>(
+                                 x, out, dims, keep_dim, reduce_all);
+                           }));
+}
+
+enum ArgMinMaxType { kArgMin, kArgMax };
+
+template <typename T, typename Tout, int64_t Rank, ArgMinMaxType argMinMaxValue>
+struct ArgMinMaxFunctor {};
+
+#define DECLARE_ARG_MIN_MAX_FUNCTOR(eigen_op_type, enum_argminmax_value) \
+  template <typename T, typename Tout, int64_t Rank>                     \
+  struct ArgMinMaxFunctor<T, Tout, Rank, enum_argminmax_value> {         \
+    void operator()(const FDTensor& in, FDTensor* out,                   \
+                    const std::vector<int64_t>& x_dims, int64_t axis,    \
+                    bool keepdims) {                                     \
+      const auto& dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); \
+      auto in_eigen = EigenTensor<T, Rank>::From(in, x_dims);            \
+      if (keepdims) {                                                    \
+        auto out_eigen = EigenTensor<Tout, Rank>::From(*out);            \
+        out_eigen.device(dev) =                                          \
+            in_eigen.eigen_op_type(axis).template cast<Tout>();          \
+      } else {                                                           \
+        auto out_eigen = EigenTensor<Tout, Rank - 1>::From(*out);        \
+        out_eigen.device(dev) =                                          \
+            in_eigen.eigen_op_type(axis).template cast<Tout>();          \
+      }                                                                  \
+    }                                                                    \
+  }
+
+DECLARE_ARG_MIN_MAX_FUNCTOR(argmin, ArgMinMaxType::kArgMin);
+DECLARE_ARG_MIN_MAX_FUNCTOR(argmax, ArgMinMaxType::kArgMax);
+
+template <typename T, typename Tout, ArgMinMaxType EnumArgMinMaxValue>
+void ArgMinMaxKernel(const FDTensor& x, FDTensor* out, int64_t axis,
+                     bool keepdims, bool flatten) {
+  bool new_keepdims = keepdims | flatten;
+  // if flatten, will construct the new dims for the cacluate
+  std::vector<int64_t> x_dims;
+  int new_axis = axis;
+  if (flatten) {
+    x_dims = {x.Numel()};
+    // if flatten, the axis just as 0
+    new_axis = 0;
+  } else {
+    x_dims = x.shape;
+    if (axis < 0) new_axis = axis + x_dims.size();
+  }
+#define CALL_ARG_MINMAX_FUNCTOR(rank)                                \
+  ArgMinMaxFunctor<T, Tout, rank, EnumArgMinMaxValue> functor##rank; \
+  functor##rank(x, out, x_dims, new_axis, new_keepdims)
+
+  switch (x_dims.size()) {
+    case 1:
+      CALL_ARG_MINMAX_FUNCTOR(1);
+      break;
+    case 2:
+      CALL_ARG_MINMAX_FUNCTOR(2);
+      break;
+    case 3:
+      CALL_ARG_MINMAX_FUNCTOR(3);
+      break;
+    case 4:
+      CALL_ARG_MINMAX_FUNCTOR(4);
+      break;
+    case 5:
+      CALL_ARG_MINMAX_FUNCTOR(5);
+      break;
+    case 6:
+      CALL_ARG_MINMAX_FUNCTOR(6);
+      break;
+    default:
+      FDASSERT(x_dims.size() <= 6,
+               "%s operator doesn't supports tensors whose ranks are greater "
+               "than 6.",
+               (EnumArgMinMaxValue == kArgMin ? "argmin" : "argmax"));
+      break;
+#undef CALL_ARG_MINMAX_FUNCTOR
+  }
+}
+
+template <typename T, ArgMinMaxType EnumArgMinMaxValue>
+void ArgMinMax(const FDTensor& x, FDTensor* out, int64_t axis,
+               FDDataType output_dtype, bool keepdims, bool flatten) {
+  const auto& x_dims = x.shape;
+  FDASSERT(axis >= -x_dims.size(),
+           "'axis'(%d) must be greater than or equal to -Rank(X)(%d).", axis,
+           -x_dims.size());
+  FDASSERT(axis < x_dims.size(),
+           "'axis'(%d) must be less than or equal to Rank(X)(%d).", axis,
+           x_dims.size());
+  FDASSERT(output_dtype == FDDataType::INT32 || FDDataType::INT64,
+           "The attribute of dtype in argmin/argmax must be [%s] or [%s], but "
+           "received [%s].",
+           Str(FDDataType::INT32), Str(FDDataType::INT64), Str(output_dtype));
+  auto x_rank = x_dims.size();
+  if (axis < 0) axis += x_rank;
+  if (output_dtype == FDDataType::INT32) {
+    int64_t all_element_num = 0;
+    if (flatten) {
+      all_element_num = x.Numel();
+
+    } else {
+      all_element_num = x_dims[axis];
+    }
+    FDASSERT(all_element_num <= std::numeric_limits<int>::max(),
+             "The element num of the argmin/argmax input at axis is "
+             "%d, is larger than int32 maximum value:%d, you must "
+             "set the dtype of argmin/argmax to 'int64'.",
+             all_element_num, std::numeric_limits<int>::max());
+  }
+  std::vector<int64_t> vec;
+  if (flatten) {
+    vec.emplace_back(static_cast<int64_t>(1));
+  } else {
+    for (int64_t i = 0; i < axis; i++) vec.emplace_back(x_dims[i]);
+    if (keepdims) {
+      vec.emplace_back(static_cast<int64_t>(1));
+    }
+    for (int64_t i = axis + 1; i < x_rank; i++) vec.emplace_back(x_dims[i]);
+  }
+  out->Allocate(vec, output_dtype);
+
+  FD_VISIT_INT_TYPES(output_dtype, "ArgMinMaxKernel", ([&] {
+                       ArgMinMaxKernel<T, data_t, EnumArgMinMaxValue>(
+                           x, out, axis, keepdims, flatten);
                      }));
 }
 
@@ -255,6 +380,23 @@ void Prod(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
           bool keep_dim, bool reduce_all) {
   Reduce<ProdFunctor>(x, out, dims, keep_dim, reduce_all);
 }
+
+void ArgMax(const FDTensor& x, FDTensor* out, int64_t axis,
+            FDDataType output_dtype, bool keep_dim, bool flatten) {
+  FD_VISIT_INT_FLOAT_TYPES(x.dtype, "ArgMaxKernel", ([&] {
+                             ArgMinMax<data_t, kArgMax>(
+                                 x, out, axis, output_dtype, keep_dim, flatten);
+                           }));
+}
+
+void ArgMin(const FDTensor& x, FDTensor* out, int64_t axis,
+            FDDataType output_dtype, bool keep_dim, bool flatten) {
+  FD_VISIT_INT_FLOAT_TYPES(x.dtype, "ArgMaxKernel", ([&] {
+                             ArgMinMax<data_t, kArgMin>(
+                                 x, out, axis, output_dtype, keep_dim, flatten);
+                           }));
+}
+
 #endif
 
 }  // namespace fastdeploy
\ No newline at end of file
diff --git a/csrc/fastdeploy/function/reduce.h b/csrc/fastdeploy/function/reduce.h
index af8810c6b3b..3d77ed30f31 100644
--- a/csrc/fastdeploy/function/reduce.h
+++ b/csrc/fastdeploy/function/reduce.h
@@ -96,5 +96,33 @@ FASTDEPLOY_DECL void Prod(const FDTensor& x, FDTensor* out,
                           const std::vector<int64_t>& dims,
                           bool keep_dim = false, bool reduce_all = false);
 
+/** Excute the argmax operation for input FDTensor along given dims.
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param axis The axis which will be reduced.
+    @param output_dtype The data type of output FDTensor, INT64 or INT32,
+   default to INT64.
+    @param keep_dim Whether to keep the reduced dims, default false.
+    @param flatten Whether to flatten FDTensor to get the argmin index, default
+   false.
+*/
+FASTDEPLOY_DECL void ArgMax(const FDTensor& x, FDTensor* out, int64_t axis,
+                            FDDataType output_dtype = FDDataType::INT64,
+                            bool keep_dim = false, bool flatten = false);
+
+/** Excute the argmin operation for input FDTensor along given dims.
+    @param x The input tensor.
+    @param out The output tensor which stores the result.
+    @param axis The axis which will be reduced.
+    @param output_dtype The data type of output FDTensor, INT64 or INT32,
+   default to INT64.
+    @param keep_dim Whether to keep the reduced dims, default false.
+    @param flatten Whether to flatten FDTensor to get the argmin index, default
+   false.
+*/
+FASTDEPLOY_DECL void ArgMin(const FDTensor& x, FDTensor* out, int64_t axis,
+                            FDDataType output_dtype = FDDataType::INT64,
+                            bool keep_dim = false, bool flatten = false);
+
 #endif
 }  // namespace fastdeploy
diff --git a/csrc/fastdeploy/utils/utils.h b/csrc/fastdeploy/utils/utils.h
index dbac35fb6bf..96f5c64cdfc 100644
--- a/csrc/fastdeploy/utils/utils.h
+++ b/csrc/fastdeploy/utils/utils.h
@@ -132,6 +132,26 @@ FASTDEPLOY_DECL bool ReadBinaryFromFile(const std::string& file,
     }                                                                          \
   }()
 
+#define FD_VISIT_INT_FLOAT_TYPES(TYPE, NAME, ...)                             \
+  [&] {                                                                       \
+    const auto& __dtype__ = TYPE;                                             \
+    switch (__dtype__) {                                                      \
+      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t,    \
+                           __VA_ARGS__)                                       \
+      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT64, int64_t,    \
+                           __VA_ARGS__)                                       \
+      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP32, float,       \
+                           __VA_ARGS__)                                       \
+      FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double,      \
+                           __VA_ARGS__)                                       \
+      default:                                                                \
+        FDASSERT(false,                                                       \
+                 "Invalid enum data type. Expect to accept data type INT32, " \
+                 "INT64, FP32, FP64, but receive type %s.",                   \
+                 Str(__dtype__));                                             \
+    }                                                                         \
+  }()
+
 #define FD_VISIT_FLOAT_TYPES(TYPE, NAME, ...)                                \
   [&] {                                                                      \
     const auto& __dtype__ = TYPE;                                            \

From cab6646cd2d1e8cd232bdfe5d12b5fa54390cfd2 Mon Sep 17 00:00:00 2001
From: zhoushunjie <zhoushunjie@baidu.com>
Date: Fri, 12 Aug 2022 18:49:26 +0800
Subject: [PATCH 2/2] Add unittest for argmax, argmin

---
 csrc/fastdeploy/function/reduce.cc | 26 +++++++-----
 tests/function/test_reduce.cc      | 67 ++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+), 10 deletions(-)

diff --git a/csrc/fastdeploy/function/reduce.cc b/csrc/fastdeploy/function/reduce.cc
index bd4870d0679..388a6be33b2 100644
--- a/csrc/fastdeploy/function/reduce.cc
+++ b/csrc/fastdeploy/function/reduce.cc
@@ -232,13 +232,19 @@ struct ArgMinMaxFunctor {};
   struct ArgMinMaxFunctor<T, Tout, Rank, enum_argminmax_value> {         \
     void operator()(const FDTensor& in, FDTensor* out,                   \
                     const std::vector<int64_t>& x_dims, int64_t axis,    \
-                    bool keepdims) {                                     \
+                    bool keepdims, bool flatten) {                       \
       const auto& dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); \
       auto in_eigen = EigenTensor<T, Rank>::From(in, x_dims);            \
       if (keepdims) {                                                    \
-        auto out_eigen = EigenTensor<Tout, Rank>::From(*out);            \
-        out_eigen.device(dev) =                                          \
-            in_eigen.eigen_op_type(axis).template cast<Tout>();          \
+        if (!flatten) {                                                  \
+          auto out_eigen = EigenTensor<Tout, Rank>::From(*out);          \
+          out_eigen.device(dev) =                                        \
+              in_eigen.eigen_op_type(axis).template cast<Tout>();        \
+        } else {                                                         \
+          auto out_eigen = EigenScalar<Tout>::From(*out);                \
+          out_eigen.device(dev) =                                        \
+              in_eigen.eigen_op_type(axis).template cast<Tout>();        \
+        }                                                                \
       } else {                                                           \
         auto out_eigen = EigenTensor<Tout, Rank - 1>::From(*out);        \
         out_eigen.device(dev) =                                          \
@@ -267,7 +273,7 @@ void ArgMinMaxKernel(const FDTensor& x, FDTensor* out, int64_t axis,
   }
 #define CALL_ARG_MINMAX_FUNCTOR(rank)                                \
   ArgMinMaxFunctor<T, Tout, rank, EnumArgMinMaxValue> functor##rank; \
-  functor##rank(x, out, x_dims, new_axis, new_keepdims)
+  functor##rank(x, out, x_dims, new_axis, new_keepdims, flatten)
 
   switch (x_dims.size()) {
     case 1:
@@ -302,17 +308,17 @@ template <typename T, ArgMinMaxType EnumArgMinMaxValue>
 void ArgMinMax(const FDTensor& x, FDTensor* out, int64_t axis,
                FDDataType output_dtype, bool keepdims, bool flatten) {
   const auto& x_dims = x.shape;
-  FDASSERT(axis >= -x_dims.size(),
+  int64_t x_rank = x_dims.size();
+  FDASSERT(axis >= -x_rank,
            "'axis'(%d) must be greater than or equal to -Rank(X)(%d).", axis,
-           -x_dims.size());
-  FDASSERT(axis < x_dims.size(),
+           -x_rank);
+  FDASSERT(axis < x_rank,
            "'axis'(%d) must be less than or equal to Rank(X)(%d).", axis,
-           x_dims.size());
+           x_rank);
   FDASSERT(output_dtype == FDDataType::INT32 || FDDataType::INT64,
            "The attribute of dtype in argmin/argmax must be [%s] or [%s], but "
            "received [%s].",
            Str(FDDataType::INT32), Str(FDDataType::INT64), Str(output_dtype));
-  auto x_rank = x_dims.size();
   if (axis < 0) axis += x_rank;
   if (output_dtype == FDDataType::INT32) {
     int64_t all_element_num = 0;
diff --git a/tests/function/test_reduce.cc b/tests/function/test_reduce.cc
index 5b16e53b452..91de87fa65b 100644
--- a/tests/function/test_reduce.cc
+++ b/tests/function/test_reduce.cc
@@ -305,5 +305,72 @@ TEST(fastdeploy, reduce_any) {
   check_data(reinterpret_cast<const bool*>(output.Data()),
              expected_result_noaxis.data(), expected_result_noaxis.size());
 }
+
+TEST(fastdeploy, reduce_argmax) {
+  FDTensor input, output;
+  CheckShape check_shape;
+  CheckData check_data;
+
+  std::vector<int> inputs = {2, 4, 3, 7, 1, 5};
+  std::vector<int64_t> expected_result_axis0 = {1, 0, 1};
+  std::vector<int64_t> expected_result_axis1 = {1, 0};
+  std::vector<int64_t> expected_result_noaxis = {3};
+  input.SetExternalData({2, 3}, FDDataType::INT32, inputs.data());
+
+  // axis = 0, output_dtype = FDDataType::INT64, keep_dim = false, flatten =
+  // false
+  ArgMax(input, &output, 0);
+  check_shape(output.shape, {3});
+  check_data(reinterpret_cast<const int64_t*>(output.Data()),
+             expected_result_axis0.data(), expected_result_axis0.size());
+
+  // axis = -1, output_dtype = FDDataType::INT64, keep_dim = false, flatten =
+  // false
+  ArgMax(input, &output, -1);
+  check_shape(output.shape, {2});
+  check_data(reinterpret_cast<const int64_t*>(output.Data()),
+             expected_result_axis1.data(), expected_result_axis1.size());
+
+  // axis = -1, output_dtype = FDDataType::INT64, keep_dim = false, flatten =
+  // true
+  ArgMax(input, &output, -1, FDDataType::INT64, false, true);
+  check_shape(output.shape, {1});
+  check_data(reinterpret_cast<const int64_t*>(output.Data()),
+             expected_result_noaxis.data(), expected_result_noaxis.size());
+}
+
+TEST(fastdeploy, reduce_argmin) {
+  FDTensor input, output;
+  CheckShape check_shape;
+  CheckData check_data;
+
+  std::vector<int> inputs = {2, 4, 3, 7, 1, 5};
+  std::vector<int64_t> expected_result_axis0 = {0, 1, 0};
+  std::vector<int64_t> expected_result_axis1 = {0, 1};
+  std::vector<int64_t> expected_result_noaxis = {4};
+  input.SetExternalData({2, 3}, FDDataType::INT32, inputs.data());
+
+  // axis = 0, output_dtype = FDDataType::INT64, keep_dim = false, flatten =
+  // false
+  ArgMin(input, &output, 0);
+  check_shape(output.shape, {3});
+  check_data(reinterpret_cast<const int64_t*>(output.Data()),
+             expected_result_axis0.data(), expected_result_axis0.size());
+
+  // axis = -1, output_dtype = FDDataType::INT64, keep_dim = false, flatten =
+  // false
+  ArgMin(input, &output, -1);
+  check_shape(output.shape, {2});
+  check_data(reinterpret_cast<const int64_t*>(output.Data()),
+             expected_result_axis1.data(), expected_result_axis1.size());
+
+  // axis = -1, output_dtype = FDDataType::INT64, keep_dim = false, flatten =
+  // true
+  ArgMin(input, &output, -1, FDDataType::INT64, false, true);
+  check_shape(output.shape, {1});
+  check_data(reinterpret_cast<const int64_t*>(output.Data()),
+             expected_result_noaxis.data(), expected_result_noaxis.size());
+}
+
 #endif
 }  // namespace fastdeploy