diff --git a/doc/source/getting_started/comparison/comparison_with_r.rst b/doc/source/getting_started/comparison/comparison_with_r.rst
index 767779b0f58a8..25ba237e8caf3 100644
--- a/doc/source/getting_started/comparison/comparison_with_r.rst
+++ b/doc/source/getting_started/comparison/comparison_with_r.rst
@@ -246,7 +246,7 @@ In pandas we may use :meth:`~pandas.pivot_table` method to handle this:
        }
    )
 
-   baseball.pivot_table(values="batting avg", columns="team", aggfunc=np.max)
+   baseball.pivot_table(values="batting avg", columns="team", aggfunc="max")
 
 For more details and examples see :ref:`the reshaping documentation
 <reshaping.pivot>`.
@@ -359,7 +359,7 @@ In pandas the equivalent expression, using the
    )
 
    grouped = df.groupby(["month", "week"])
-   grouped["x"].agg([np.mean, np.std])
+   grouped["x"].agg(["mean", "std"])
 
 
 For more details and examples see :ref:`the groupby documentation
@@ -482,7 +482,7 @@ In Python the best way is to make use of :meth:`~pandas.pivot_table`:
        values="value",
        index=["variable", "week"],
        columns=["month"],
-       aggfunc=np.mean,
+       aggfunc="mean",
    )
 
 Similarly for ``dcast`` which uses a data.frame called ``df`` in R to
diff --git a/doc/source/getting_started/comparison/comparison_with_sql.rst b/doc/source/getting_started/comparison/comparison_with_sql.rst
index a6d9d65e85645..7a83d50416186 100644
--- a/doc/source/getting_started/comparison/comparison_with_sql.rst
+++ b/doc/source/getting_started/comparison/comparison_with_sql.rst
@@ -198,7 +198,7 @@ to your grouped DataFrame, indicating which functions to apply to specific colum
 
 .. ipython:: python
 
-    tips.groupby("day").agg({"tip": np.mean, "day": np.size})
+    tips.groupby("day").agg({"tip": "mean", "day": "size"})
 
 Grouping by more than one column is done by passing a list of columns to the
 :meth:`~pandas.DataFrame.groupby` method.
@@ -222,7 +222,7 @@ Grouping by more than one column is done by passing a list of columns to the
 
 .. ipython:: python
 
-    tips.groupby(["smoker", "day"]).agg({"tip": [np.size, np.mean]})
+    tips.groupby(["smoker", "day"]).agg({"tip": ["size", "mean"]})
 
 .. _compare_with_sql.join:
 
diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst
index 65892f01326e4..389a2d23c466d 100644
--- a/doc/source/user_guide/basics.rst
+++ b/doc/source/user_guide/basics.rst
@@ -881,8 +881,8 @@ statistics methods, takes an optional ``axis`` argument:
 
 .. ipython:: python
 
-   df.apply(np.mean)
-   df.apply(np.mean, axis=1)
+   df.apply(lambda x: np.mean(x))
+   df.apply(lambda x: np.mean(x), axis=1)
    df.apply(lambda x: x.max() - x.min())
    df.apply(np.cumsum)
    df.apply(np.exp)
@@ -986,7 +986,7 @@ output:
 
 .. ipython:: python
 
-   tsdf.agg(np.sum)
+   tsdf.agg(lambda x: np.sum(x))
 
    tsdf.agg("sum")
 
diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst
index c7278c604ca02..fd4f7cd1b83fe 100644
--- a/doc/source/user_guide/cookbook.rst
+++ b/doc/source/user_guide/cookbook.rst
@@ -530,7 +530,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
 
    code_groups = df.groupby("code")
 
-   agg_n_sort_order = code_groups[["data"]].transform(sum).sort_values(by="data")
+   agg_n_sort_order = code_groups[["data"]].transform("sum").sort_values(by="data")
 
    sorted_df = df.loc[agg_n_sort_order.index]
 
@@ -549,7 +549,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
            return x.iloc[1] * 1.234
        return pd.NaT
 
-   mhc = {"Mean": np.mean, "Max": np.max, "Custom": MyCust}
+   mhc = {"Mean": "mean", "Max": "max", "Custom": MyCust}
    ts.resample("5min").apply(mhc)
    ts
 
@@ -685,7 +685,7 @@ The :ref:`Pivot <reshaping.pivot>` docs.
        values=["Sales"],
        index=["Province"],
        columns=["City"],
-       aggfunc=np.sum,
+       aggfunc="sum",
        margins=True,
    )
    table.stack("City")
diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst
index 8d0f1048f6e77..4df6996c4f66b 100644
--- a/doc/source/user_guide/reshaping.rst
+++ b/doc/source/user_guide/reshaping.rst
@@ -402,12 +402,12 @@ We can produce pivot tables from this data very easily:
 .. ipython:: python
 
    pd.pivot_table(df, values="D", index=["A", "B"], columns=["C"])
-   pd.pivot_table(df, values="D", index=["B"], columns=["A", "C"], aggfunc=np.sum)
+   pd.pivot_table(df, values="D", index=["B"], columns=["A", "C"], aggfunc="sum")
    pd.pivot_table(
        df, values=["D", "E"],
        index=["B"],
        columns=["A", "C"],
-       aggfunc=np.sum,
+       aggfunc="sum",
    )
 
 The result object is a :class:`DataFrame` having potentially hierarchical indexes on the
@@ -451,7 +451,7 @@ rows and columns:
        columns="C",
        values=["D", "E"],
        margins=True,
-       aggfunc=np.std
+       aggfunc="std"
    )
    table
 
@@ -552,7 +552,7 @@ each group defined by the first two :class:`Series`:
 
 .. ipython:: python
 
-   pd.crosstab(df["A"], df["B"], values=df["C"], aggfunc=np.sum)
+   pd.crosstab(df["A"], df["B"], values=df["C"], aggfunc="sum")
 
 Adding margins
 ~~~~~~~~~~~~~~
@@ -562,7 +562,7 @@ Finally, one can also add margins or normalize this output.
 .. ipython:: python
 
    pd.crosstab(
-       df["A"], df["B"], values=df["C"], aggfunc=np.sum, normalize=True, margins=True
+       df["A"], df["B"], values=df["C"], aggfunc="sum", normalize=True, margins=True
    )
 
 .. _reshaping.tile:
diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
index fb1c37c1b9073..a0754ba0d2995 100644
--- a/doc/source/user_guide/timeseries.rst
+++ b/doc/source/user_guide/timeseries.rst
@@ -1801,14 +1801,14 @@ You can pass a list or dict of functions to do aggregation with, outputting a ``
 
 .. ipython:: python
 
-   r["A"].agg([np.sum, np.mean, np.std])
+   r["A"].agg(["sum", "mean", "std"])
 
 On a resampled ``DataFrame``, you can pass a list of functions to apply to each
 column, which produces an aggregated result with a hierarchical index:
 
 .. ipython:: python
 
-   r.agg([np.sum, np.mean])
+   r.agg(["sum", "mean"])
 
 By passing a dict to ``aggregate`` you can apply a different aggregation to the
 columns of a ``DataFrame``:
@@ -1816,7 +1816,7 @@ columns of a ``DataFrame``:
 .. ipython:: python
    :okexcept:
 
-   r.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)})
+   r.agg({"A": "sum", "B": lambda x: np.std(x, ddof=1)})
 
 The function names can also be strings. In order for a string to be valid it
 must be implemented on the resampled object:
diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst
index ea80a2804256c..85e8762570736 100644
--- a/doc/source/user_guide/window.rst
+++ b/doc/source/user_guide/window.rst
@@ -140,7 +140,7 @@ of multiple aggregations applied to a window.
 .. ipython:: python
 
    df = pd.DataFrame({"A": range(5), "B": range(10, 15)})
-   df.expanding().agg([np.sum, np.mean, np.std])
+   df.expanding().agg(["sum", "mean", "std"])
 
 
 .. _window.generic:
diff --git a/doc/source/whatsnew/v0.14.0.rst b/doc/source/whatsnew/v0.14.0.rst
index ef24f2645d992..f33ab3911f231 100644
--- a/doc/source/whatsnew/v0.14.0.rst
+++ b/doc/source/whatsnew/v0.14.0.rst
@@ -846,7 +846,7 @@ Enhancements
      df.pivot_table(values='Quantity',
                     index=pd.Grouper(freq='M', key='Date'),
                     columns=pd.Grouper(freq='M', key='PayDay'),
-                    aggfunc=np.sum)
+                    aggfunc="sum")
 
 - Arrays of strings can be wrapped to a specified width (``str.wrap``) (:issue:`6999`)
 - Add :meth:`~Series.nsmallest` and :meth:`Series.nlargest` methods to Series, See :ref:`the docs <basics.nsorted>` (:issue:`3960`)
diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst
index 34a875f59e808..b4224785988e6 100644
--- a/doc/source/whatsnew/v0.20.0.rst
+++ b/doc/source/whatsnew/v0.20.0.rst
@@ -984,7 +984,7 @@ Previous behavior:
      75%    3.750000
      max    4.000000
 
-   In [3]: df.groupby('A').agg([np.mean, np.std, np.min, np.max])
+   In [3]: df.groupby('A').agg(["mean", "std", "min", "max"])
    Out[3]:
         B
      mean       std amin amax
@@ -1000,7 +1000,7 @@ New behavior:
 
    df.groupby('A').describe()
 
-   df.groupby('A').agg([np.mean, np.std, np.min, np.max])
+   df.groupby('A').agg(["mean", "std", "min", "max"])
 
 .. _whatsnew_0200.api_breaking.rolling_pairwise:
 
@@ -1163,7 +1163,7 @@ Previous behavior:
 
 .. code-block:: ipython
 
-   In [2]: df.pivot_table('col1', index=['col3', 'col2'], aggfunc=np.sum)
+   In [2]: df.pivot_table('col1', index=['col3', 'col2'], aggfunc="sum")
    Out[2]:
    col3  col2
    1     C       3
@@ -1175,7 +1175,7 @@ New behavior:
 
 .. ipython:: python
 
-   df.pivot_table('col1', index=['col3', 'col2'], aggfunc=np.sum)
+   df.pivot_table('col1', index=['col3', 'col2'], aggfunc="sum")
 
 .. _whatsnew_0200.api:
 
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 7965d335d0aac..c0f169aa6251f 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -48,7 +48,7 @@ output columns when applying multiple aggregation functions to specific columns
    animals.groupby("kind").agg(
        min_height=pd.NamedAgg(column='height', aggfunc='min'),
        max_height=pd.NamedAgg(column='height', aggfunc='max'),
-       average_weight=pd.NamedAgg(column='weight', aggfunc=np.mean),
+       average_weight=pd.NamedAgg(column='weight', aggfunc="mean"),
    )
 
 Pass the desired columns names as the ``**kwargs`` to ``.agg``. The values of ``**kwargs``
@@ -61,7 +61,7 @@ what the arguments to the function are, but plain tuples are accepted as well.
    animals.groupby("kind").agg(
        min_height=('height', 'min'),
        max_height=('height', 'max'),
-       average_weight=('weight', np.mean),
+       average_weight=('weight', 'mean'),
    )
 
 Named aggregation is the recommended replacement for the deprecated "dict-of-dicts"
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index dc306471dbd3f..eb49c69ad7567 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -315,11 +315,11 @@ Deprecations
 - Deprecated option "mode.use_inf_as_na", convert inf entries to ``NaN`` before instead (:issue:`51684`)
 - Deprecated parameter ``obj`` in :meth:`GroupBy.get_group` (:issue:`53545`)
 - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`)
+- Deprecated replacing builtin and NumPy functions in ``.agg``, ``.apply``, and ``.transform``; use the corresponding string alias (e.g. ``"sum"`` for ``sum`` or ``np.sum``) instead (:issue:`53425`)
 - Deprecated strings ``T``, ``t``, ``L`` and ``l`` denoting units in :func:`to_timedelta` (:issue:`52536`)
 - Deprecated the "method" and "limit" keywords on :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`SeriesGroupBy.fillna`, :meth:`DataFrameGroupBy.fillna`, and :meth:`Resampler.fillna`, use ``obj.bfill()`` or ``obj.ffill()`` instead (:issue:`53394`)
 - Deprecated the ``method`` and ``limit`` keywords in :meth:`DataFrame.replace` and :meth:`Series.replace` (:issue:`33302`)
 - Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`)
--
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_210.performance:
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 83a3b29bfd7f0..6af4557897a0d 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -170,6 +170,7 @@ def agg(self) -> DataFrame | Series | None:
         if callable(func):
             f = com.get_cython_func(func)
             if f and not args and not kwargs:
+                warn_alias_replacement(obj, func, f)
                 return getattr(obj, f)()
 
         # caller can react
@@ -280,6 +281,7 @@ def transform_str_or_callable(self, func) -> DataFrame | Series:
         if not args and not kwargs:
             f = com.get_cython_func(func)
             if f:
+                warn_alias_replacement(obj, func, f)
                 return getattr(obj, f)()
 
         # Two possible ways to use a UDF - apply or call directly
@@ -1695,3 +1697,23 @@ def validate_func_kwargs(
         no_arg_message = "Must provide 'func' or named aggregation **kwargs."
         raise TypeError(no_arg_message)
     return columns, func
+
+
+def warn_alias_replacement(
+    obj: AggObjType,
+    func: Callable,
+    alias: str,
+) -> None:
+    if alias.startswith("np."):
+        full_alias = alias
+    else:
+        full_alias = f"{type(obj).__name__}.{alias}"
+        alias = f"'{alias}'"
+    warnings.warn(
+        f"The provided callable {func} is currently using "
+        f"{full_alias}. In a future version of pandas, "
+        f"the provided callable will be used directly. To keep current "
+        f"behavior pass {alias} instead.",
+        category=FutureWarning,
+        stacklevel=find_stack_level(),
+    )
diff --git a/pandas/core/common.py b/pandas/core/common.py
index ee8fe220698b5..9db03ac3ae571 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -565,6 +565,13 @@ def require_length_match(data, index: Index) -> None:
     builtins.min: np.minimum.reduce,
 }
 
+# GH#53425: Only for deprecation
+_builtin_table_alias = {
+    builtins.sum: "np.sum",
+    builtins.max: "np.maximum.reduce",
+    builtins.min: "np.minimum.reduce",
+}
+
 _cython_table = {
     builtins.sum: "sum",
     builtins.max: "max",
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index f90b5c0eedbe8..6fdd6cb2a639e 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -8851,7 +8851,7 @@ def pivot(
             it can contain any of the other types (except list). If an array is
             passed, it must be the same length as the data and will be used in
             the same manner as column values.
-        aggfunc : function, list of functions, dict, default numpy.mean
+        aggfunc : function, list of functions, dict, default "mean"
             If a list of functions is passed, the resulting pivot table will have
             hierarchical columns whose top level are the function names
             (inferred from the function objects themselves).
@@ -8926,7 +8926,7 @@ def pivot(
         This first example aggregates values by taking the sum.
 
         >>> table = pd.pivot_table(df, values='D', index=['A', 'B'],
-        ...                        columns=['C'], aggfunc=np.sum)
+        ...                        columns=['C'], aggfunc="sum")
         >>> table
         C        large  small
         A   B
@@ -8938,7 +8938,7 @@ def pivot(
         We can also fill missing values using the `fill_value` parameter.
 
         >>> table = pd.pivot_table(df, values='D', index=['A', 'B'],
-        ...                        columns=['C'], aggfunc=np.sum, fill_value=0)
+        ...                        columns=['C'], aggfunc="sum", fill_value=0)
         >>> table
         C        large  small
         A   B
@@ -8950,7 +8950,7 @@ def pivot(
         The next example aggregates by taking the mean across multiple columns.
 
         >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'],
-        ...                        aggfunc={'D': np.mean, 'E': np.mean})
+        ...                        aggfunc={'D': "mean", 'E': "mean"})
         >>> table
                         D         E
         A   C
@@ -8963,8 +8963,8 @@ def pivot(
         value column.
 
         >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'],
-        ...                        aggfunc={'D': np.mean,
-        ...                                 'E': [min, max, np.mean]})
+        ...                        aggfunc={'D': "mean",
+        ...                                 'E': ["min", "max", "mean"]})
         >>> table
                           D   E
                        mean max      mean  min
@@ -9565,7 +9565,7 @@ def _gotitem(
     Aggregate different functions over the columns and rename the index of the resulting
     DataFrame.
 
-    >>> df.agg(x=('A', max), y=('B', 'min'), z=('C', np.mean))
+    >>> df.agg(x=('A', 'max'), y=('B', 'min'), z=('C', 'mean'))
          A    B    C
     x  7.0  NaN  NaN
     y  NaN  2.0  NaN
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index e3aa97b448fe1..3bedcb935b6ba 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -64,6 +64,7 @@
     maybe_mangle_lambdas,
     reconstruct_func,
     validate_func_kwargs,
+    warn_alias_replacement,
 )
 import pandas.core.common as com
 from pandas.core.frame import DataFrame
@@ -133,7 +134,7 @@ class NamedAgg(NamedTuple):
     --------
     >>> df = pd.DataFrame({"key": [1, 1, 2], "a": [-1, 0, 1], 1: [10, 11, 12]})
     >>> agg_a = pd.NamedAgg(column="a", aggfunc="min")
-    >>> agg_1 = pd.NamedAgg(column=1, aggfunc=np.mean)
+    >>> agg_1 = pd.NamedAgg(column=1, aggfunc=lambda x: np.mean(x))
     >>> df.groupby("key").agg(result_a=agg_a, result_1=agg_1)
          result_a  result_1
     key
@@ -257,6 +258,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
         else:
             cyfunc = com.get_cython_func(func)
             if cyfunc and not args and not kwargs:
+                warn_alias_replacement(self, func, cyfunc)
                 return getattr(self, cyfunc)()
 
             if maybe_use_numba(engine):
@@ -306,7 +308,11 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
     agg = aggregate
 
     def _python_agg_general(self, func, *args, **kwargs):
+        orig_func = func
         func = com.is_builtin_func(func)
+        if orig_func != func:
+            alias = com._builtin_table_alias[func]
+            warn_alias_replacement(self, orig_func, alias)
         f = lambda x: func(x, *args, **kwargs)
 
         obj = self._obj_with_exclusions
@@ -1511,7 +1517,11 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
     agg = aggregate
 
     def _python_agg_general(self, func, *args, **kwargs):
+        orig_func = func
         func = com.is_builtin_func(func)
+        if orig_func != func:
+            alias = com._builtin_table_alias[func]
+            warn_alias_replacement(self, orig_func, alias)
         f = lambda x: func(x, *args, **kwargs)
 
         if self.ngroups == 0:
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 2e3415f9a4474..ff9c1cf757f37 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -96,6 +96,7 @@ class providing the base-class of operations.
     sample,
 )
 from pandas.core._numba import executor
+from pandas.core.apply import warn_alias_replacement
 from pandas.core.arrays import (
     BaseMaskedArray,
     Categorical,
@@ -1677,7 +1678,11 @@ def _aggregate_with_numba(self, func, *args, engine_kwargs=None, **kwargs):
         )
     )
     def apply(self, func, *args, **kwargs) -> NDFrameT:
+        orig_func = func
         func = com.is_builtin_func(func)
+        if orig_func != func:
+            alias = com._builtin_table_alias[orig_func]
+            warn_alias_replacement(self, orig_func, alias)
 
         if isinstance(func, str):
             if hasattr(self, func):
@@ -1880,7 +1885,10 @@ def _cython_transform(
     @final
     def _transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
         # optimized transforms
+        orig_func = func
         func = com.get_cython_func(func) or func
+        if orig_func != func:
+            warn_alias_replacement(self, orig_func, func)
 
         if not isinstance(func, str):
             return self._transform_general(func, engine, engine_kwargs, *args, **kwargs)
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index 3916f90e1f0b2..c0a6587d527e1 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -41,7 +41,10 @@
 )
 
 import pandas.core.algorithms as algos
-from pandas.core.apply import ResamplerWindowApply
+from pandas.core.apply import (
+    ResamplerWindowApply,
+    warn_alias_replacement,
+)
 from pandas.core.base import (
     PandasObject,
     SelectionMixin,
@@ -295,7 +298,7 @@ def pipe(
 
     >>> r = s.resample('2s')
 
-    >>> r.agg(np.sum)
+    >>> r.agg("sum")
     2013-01-01 00:00:00    3
     2013-01-01 00:00:02    7
     2013-01-01 00:00:04    5
@@ -308,7 +311,7 @@ def pipe(
     2013-01-01 00:00:04    5   5.0    5
 
     >>> r.agg({'result': lambda x: x.mean() / x.std(),
-    ...        'total': np.sum})
+    ...        'total': "sum"})
                            result  total
     2013-01-01 00:00:00  2.121320      3
     2013-01-01 00:00:02  4.949747      7
@@ -1673,7 +1676,10 @@ def _downsample(self, how, **kwargs):
         how : string / cython mapped function
         **kwargs : kw args passed to how function
         """
+        orig_how = how
         how = com.get_cython_func(how) or how
+        if orig_how != how:
+            warn_alias_replacement(self, orig_how, how)
         ax = self.ax
 
         # Excludes `on` column when provided
@@ -1827,7 +1833,10 @@ def _downsample(self, how, **kwargs):
         if self.kind == "timestamp":
             return super()._downsample(how, **kwargs)
 
+        orig_how = how
         how = com.get_cython_func(how) or how
+        if orig_how != how:
+            warn_alias_replacement(self, orig_how, how)
         ax = self.ax
 
         if is_subperiod(ax.freq, self.freq):
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index 5681167cd54f9..43f903f99d0d7 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -1608,11 +1608,13 @@ def foo2(x, b=2, c=0):
 def test_agg_std():
     df = DataFrame(np.arange(6).reshape(3, 2), columns=["A", "B"])
 
-    result = df.agg(np.std)
+    with tm.assert_produces_warning(FutureWarning, match="using DataFrame.std"):
+        result = df.agg(np.std)
     expected = Series({"A": 2.0, "B": 2.0}, dtype=float)
     tm.assert_series_equal(result, expected)
 
-    result = df.agg([np.std])
+    with tm.assert_produces_warning(FutureWarning, match="using Series.std"):
+        result = df.agg([np.std])
     expected = DataFrame({"A": 2.0, "B": 2.0}, index=["std"])
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/apply/test_frame_apply_relabeling.py b/pandas/tests/apply/test_frame_apply_relabeling.py
index 2652d43fd42ec..723bdd349c0cb 100644
--- a/pandas/tests/apply/test_frame_apply_relabeling.py
+++ b/pandas/tests/apply/test_frame_apply_relabeling.py
@@ -49,20 +49,24 @@ def test_agg_relabel_multi_columns_multi_methods():
 def test_agg_relabel_partial_functions():
     # GH 26513, test on partial, functools or more complex cases
     df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
-    result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min))
+    msg = "using Series.[mean|min]"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min))
     expected = pd.DataFrame(
         {"A": [1.5, 1.5, 1.0]}, index=pd.Index(["foo", "bar", "cat"])
     )
     tm.assert_frame_equal(result, expected)
 
-    result = df.agg(
-        foo=("A", min),
-        bar=("A", np.min),
-        cat=("B", max),
-        dat=("C", "min"),
-        f=("B", np.sum),
-        kk=("B", lambda x: min(x)),
-    )
+    msg = "using Series.[mean|min|max|sum]"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.agg(
+            foo=("A", min),
+            bar=("A", np.min),
+            cat=("B", max),
+            dat=("C", "min"),
+            f=("B", np.sum),
+            kk=("B", lambda x: min(x)),
+        )
     expected = pd.DataFrame(
         {
             "A": [1.0, 1.0, np.nan, np.nan, np.nan, np.nan],
@@ -79,7 +83,7 @@ def test_agg_namedtuple():
     df = pd.DataFrame({"A": [0, 1], "B": [1, 2]})
     result = df.agg(
         foo=pd.NamedAgg("B", "sum"),
-        bar=pd.NamedAgg("B", min),
+        bar=pd.NamedAgg("B", "min"),
         cat=pd.NamedAgg(column="B", aggfunc="count"),
         fft=pd.NamedAgg("B", aggfunc="max"),
     )
diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py
index 21b5c803d0e76..e0d52f094515b 100644
--- a/pandas/tests/apply/test_invalid_arg.py
+++ b/pandas/tests/apply/test_invalid_arg.py
@@ -222,8 +222,10 @@ def transform2(row):
 def test_agg_cython_table_raises_frame(df, func, expected, axis):
     # GH 21224
     msg = "can't multiply sequence by non-int of type 'str'"
+    warn = None if isinstance(func, str) else FutureWarning
     with pytest.raises(expected, match=msg):
-        df.agg(func, axis=axis)
+        with tm.assert_produces_warning(warn, match="using DataFrame.cumprod"):
+            df.agg(func, axis=axis)
 
 
 @pytest.mark.parametrize(
@@ -247,10 +249,12 @@ def test_agg_cython_table_raises_series(series, func, expected):
     msg = r"[Cc]ould not convert|can't multiply sequence by non-int of type"
     if func == "median" or func is np.nanmedian or func is np.median:
         msg = r"Cannot convert \['a' 'b' 'c'\] to numeric"
+    warn = None if isinstance(func, str) else FutureWarning
 
     with pytest.raises(expected, match=msg):
         # e.g. Series('a b'.split()).cumprod() will raise
-        series.agg(func)
+        with tm.assert_produces_warning(warn, match="is currently using Series.*"):
+            series.agg(func)
 
 
 def test_agg_none_to_type():
diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py
index 9002a5f85cba6..79954eeed8e95 100644
--- a/pandas/tests/apply/test_series_apply.py
+++ b/pandas/tests/apply/test_series_apply.py
@@ -561,7 +561,10 @@ def test_apply_listlike_reducer(string_series, ops, names, how, kwargs):
     # GH 39140
     expected = Series({name: op(string_series) for name, op in zip(names, ops)})
     expected.name = "series"
-    result = getattr(string_series, how)(ops, **kwargs)
+    warn = FutureWarning if how == "agg" else None
+    msg = f"using Series.[{'|'.join(names)}]"
+    with tm.assert_produces_warning(warn, match=msg):
+        result = getattr(string_series, how)(ops, **kwargs)
     tm.assert_series_equal(result, expected)
 
 
@@ -582,7 +585,10 @@ def test_apply_dictlike_reducer(string_series, ops, how, kwargs, by_row):
     # GH 39140
     expected = Series({name: op(string_series) for name, op in ops.items()})
     expected.name = string_series.name
-    result = getattr(string_series, how)(ops, **kwargs)
+    warn = FutureWarning if how == "agg" else None
+    msg = "using Series.[sum|mean]"
+    with tm.assert_produces_warning(warn, match=msg):
+        result = getattr(string_series, how)(ops, **kwargs)
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/apply/test_series_apply_relabeling.py b/pandas/tests/apply/test_series_apply_relabeling.py
index c0a285e6eb38c..cdfa054f91c9b 100644
--- a/pandas/tests/apply/test_series_apply_relabeling.py
+++ b/pandas/tests/apply/test_series_apply_relabeling.py
@@ -14,8 +14,12 @@ def test_relabel_no_duplicated_method():
     expected = df["B"].agg({"foo": "min", "bar": "max"})
     tm.assert_series_equal(result, expected)
 
-    result = df["B"].agg(foo=sum, bar=min, cat="max")
-    expected = df["B"].agg({"foo": sum, "bar": min, "cat": "max"})
+    msg = "using Series.[sum|min|max]"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df["B"].agg(foo=sum, bar=min, cat="max")
+    msg = "using Series.[sum|min|max]"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = df["B"].agg({"foo": sum, "bar": min, "cat": "max"})
     tm.assert_series_equal(result, expected)
 
 
@@ -28,6 +32,8 @@ def test_relabel_duplicated_method():
     expected = pd.Series([6, 6], index=["foo", "bar"], name="A")
     tm.assert_series_equal(result, expected)
 
-    result = df["B"].agg(foo=min, bar="min")
+    msg = "using Series.min"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df["B"].agg(foo=min, bar="min")
     expected = pd.Series([1, 1], index=["foo", "bar"], name="B")
     tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py
index 64189fae5f578..363d0285cabbc 100644
--- a/pandas/tests/apply/test_str.py
+++ b/pandas/tests/apply/test_str.py
@@ -135,7 +135,9 @@ def test_agg_cython_table_series(series, func, expected):
     # GH21224
     # test reducing functions in
     # pandas.core.base.SelectionMixin._cython_table
-    result = series.agg(func)
+    warn = None if isinstance(func, str) else FutureWarning
+    with tm.assert_produces_warning(warn, match="is currently using Series.*"):
+        result = series.agg(func)
     if is_number(expected):
         assert np.isclose(result, expected, equal_nan=True)
     else:
@@ -168,7 +170,9 @@ def test_agg_cython_table_transform_series(series, func, expected):
     # GH21224
     # test transforming functions in
     # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
-    result = series.agg(func)
+    warn = None if isinstance(func, str) else FutureWarning
+    with tm.assert_produces_warning(warn, match="is currently using Series.*"):
+        result = series.agg(func)
     tm.assert_series_equal(result, expected)
 
 
@@ -211,7 +215,10 @@ def test_agg_cython_table_frame(df, func, expected, axis):
     # GH 21224
     # test reducing functions in
     # pandas.core.base.SelectionMixin._cython_table
-    result = df.agg(func, axis=axis)
+    warn = None if isinstance(func, str) else FutureWarning
+    with tm.assert_produces_warning(warn, match="is currently using DataFrame.*"):
+        # GH#53425
+        result = df.agg(func, axis=axis)
     tm.assert_series_equal(result, expected)
 
 
@@ -238,7 +245,10 @@ def test_agg_cython_table_transform_frame(df, func, expected, axis):
         # operating blockwise doesn't let us preserve dtypes
         expected = expected.astype("float64")
 
-    result = df.agg(func, axis=axis)
+    warn = None if isinstance(func, str) else FutureWarning
+    with tm.assert_produces_warning(warn, match="is currently using DataFrame.*"):
+        # GH#53425
+        result = df.agg(func, axis=axis)
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 3558377907931..2875e1ae80501 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -40,7 +40,7 @@ def dummy_func(x):
 
 def test_agg_regression1(tsframe):
     grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])
-    result = grouped.agg(np.mean)
+    result = grouped.agg("mean")
     expected = grouped.mean()
     tm.assert_frame_equal(result, expected)
 
@@ -141,8 +141,8 @@ def test_agg_apply_corner(ts, tsframe):
     # groupby float64 values results in a float64 Index
     exp = Series([], dtype=np.float64, index=Index([], dtype=np.float64))
     tm.assert_series_equal(grouped.sum(), exp)
-    tm.assert_series_equal(grouped.agg(np.sum), exp)
-    tm.assert_series_equal(grouped.apply(np.sum), exp, check_index_type=False)
+    tm.assert_series_equal(grouped.agg("sum"), exp)
+    tm.assert_series_equal(grouped.apply("sum"), exp, check_index_type=False)
 
     # DataFrame
     grouped = tsframe.groupby(tsframe["A"] * np.nan, group_keys=False)
@@ -152,7 +152,7 @@ def test_agg_apply_corner(ts, tsframe):
         index=Index([], name="A", dtype=np.float64),
     )
     tm.assert_frame_equal(grouped.sum(), exp_df)
-    tm.assert_frame_equal(grouped.agg(np.sum), exp_df)
+    tm.assert_frame_equal(grouped.agg("sum"), exp_df)
 
     msg = "The behavior of DataFrame.sum with axis=None is deprecated"
     with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
@@ -167,13 +167,13 @@ def test_agg_grouping_is_list_tuple(ts):
     grouper = grouped.grouper.groupings[0].grouping_vector
     grouped.grouper.groupings[0] = Grouping(ts.index, list(grouper))
 
-    result = grouped.agg(np.mean)
+    result = grouped.agg("mean")
     expected = grouped.mean()
     tm.assert_frame_equal(result, expected)
 
     grouped.grouper.groupings[0] = Grouping(ts.index, tuple(grouper))
 
-    result = grouped.agg(np.mean)
+    result = grouped.agg("mean")
     expected = grouped.mean()
     tm.assert_frame_equal(result, expected)
 
@@ -181,7 +181,7 @@ def test_agg_grouping_is_list_tuple(ts):
 def test_agg_python_multiindex(mframe):
     grouped = mframe.groupby(["A", "B"])
 
-    result = grouped.agg(np.mean)
+    result = grouped.agg("mean")
     expected = grouped.mean()
     tm.assert_frame_equal(result, expected)
 
@@ -348,7 +348,9 @@ def func(ser):
 def test_agg_multiple_functions_maintain_order(df):
     # GH #610
     funcs = [("mean", np.mean), ("max", np.max), ("min", np.min)]
-    result = df.groupby("A")["C"].agg(funcs)
+    msg = "is currently using SeriesGroupBy.mean"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A")["C"].agg(funcs)
     exp_cols = Index(["mean", "max", "min"])
 
     tm.assert_index_equal(result.columns, exp_cols)
@@ -428,20 +430,20 @@ def test_multiple_functions_tuples_and_non_tuples(df):
 def test_more_flexible_frame_multi_function(df):
     grouped = df.groupby("A")
 
-    exmean = grouped.agg({"C": np.mean, "D": np.mean})
-    exstd = grouped.agg({"C": np.std, "D": np.std})
+    exmean = grouped.agg({"C": "mean", "D": "mean"})
+    exstd = grouped.agg({"C": "std", "D": "std"})
 
     expected = concat([exmean, exstd], keys=["mean", "std"], axis=1)
     expected = expected.swaplevel(0, 1, axis=1).sort_index(level=0, axis=1)
 
-    d = {"C": [np.mean, np.std], "D": [np.mean, np.std]}
+    d = {"C": ["mean", "std"], "D": ["mean", "std"]}
     result = grouped.aggregate(d)
 
     tm.assert_frame_equal(result, expected)
 
     # be careful
-    result = grouped.aggregate({"C": np.mean, "D": [np.mean, np.std]})
-    expected = grouped.aggregate({"C": np.mean, "D": [np.mean, np.std]})
+    result = grouped.aggregate({"C": "mean", "D": ["mean", "std"]})
+    expected = grouped.aggregate({"C": "mean", "D": ["mean", "std"]})
     tm.assert_frame_equal(result, expected)
 
     def numpymean(x):
@@ -453,11 +455,11 @@ def numpystd(x):
     # this uses column selection & renaming
     msg = r"nested renamer is not supported"
     with pytest.raises(SpecificationError, match=msg):
-        d = {"C": np.mean, "D": {"foo": np.mean, "bar": np.std}}
+        d = {"C": "mean", "D": {"foo": "mean", "bar": "std"}}
         grouped.aggregate(d)
 
     # But without renaming, these functions are OK
-    d = {"C": [np.mean], "D": [numpymean, numpystd]}
+    d = {"C": ["mean"], "D": [numpymean, numpystd]}
     grouped.aggregate(d)
 
 
@@ -774,8 +776,8 @@ def test_agg_relabel(self):
         p98 = functools.partial(np.percentile, q=98)
         result = df.groupby("group").agg(
             b_min=("B", "min"),
-            a_min=("A", min),
-            a_mean=("A", np.mean),
+            a_min=("A", "min"),
+            a_mean=("A", "mean"),
             a_max=("A", "max"),
             b_max=("B", "max"),
             a_98=("A", p98),
@@ -880,16 +882,16 @@ def test_mangled(self):
     [
         (
             (("y", "A"), "max"),
-            (("y", "A"), np.min),
+            (("y", "A"), np.mean),
             (("y", "B"), "mean"),
             [1, 3],
-            [0, 2],
+            [0.5, 2.5],
             [5.5, 7.5],
         ),
         (
             (("y", "A"), lambda x: max(x)),
             (("y", "A"), lambda x: 1),
-            (("y", "B"), "mean"),
+            (("y", "B"), np.mean),
             [1, 3],
             [1, 1],
             [5.5, 7.5],
@@ -918,9 +920,11 @@ def test_agg_relabel_multiindex_column(
     expected = DataFrame({"a_max": [1, 3]}, index=idx)
     tm.assert_frame_equal(result, expected)
 
-    result = df.groupby(("x", "group")).agg(
-        col_1=agg_col1, col_2=agg_col2, col_3=agg_col3
-    )
+    msg = "is currently using SeriesGroupBy.mean"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby(("x", "group")).agg(
+            col_1=agg_col1, col_2=agg_col2, col_3=agg_col3
+        )
     expected = DataFrame(
         {"col_1": agg_result1, "col_2": agg_result2, "col_3": agg_result3}, index=idx
     )
diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
index 2fb7c8eb03bb0..873e3e73c7cf5 100644
--- a/pandas/tests/groupby/aggregate/test_cython.py
+++ b/pandas/tests/groupby/aggregate/test_cython.py
@@ -21,6 +21,7 @@
     bdate_range,
 )
 import pandas._testing as tm
+import pandas.core.common as com
 
 
 @pytest.mark.parametrize(
@@ -84,7 +85,10 @@ def test_cython_agg_boolean():
         }
     )
     result = frame.groupby("a")["b"].mean()
-    expected = frame.groupby("a")["b"].agg(np.mean)
+    msg = "using SeriesGroupBy.mean"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        # GH#53425
+        expected = frame.groupby("a")["b"].agg(np.mean)
 
     tm.assert_series_equal(result, expected)
 
@@ -159,7 +163,10 @@ def test_cython_fail_agg():
 
     grouped = ts.groupby(lambda x: x.month)
     summed = grouped.sum()
-    expected = grouped.agg(np.sum)
+    msg = "using SeriesGroupBy.sum"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        # GH#53425
+        expected = grouped.agg(np.sum)
     tm.assert_series_equal(summed, expected)
 
 
@@ -182,7 +189,11 @@ def test__cython_agg_general(op, targop):
     labels = np.random.randint(0, 50, size=1000).astype(float)
 
     result = df.groupby(labels)._cython_agg_general(op, alt=None, numeric_only=True)
-    expected = df.groupby(labels).agg(targop)
+    warn = FutureWarning if targop in com._cython_table else None
+    msg = f"using DataFrameGroupBy.{op}"
+    with tm.assert_produces_warning(warn, match=msg):
+        # GH#53425
+        expected = df.groupby(labels).agg(targop)
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index aad1218190a84..8772e3cfb45f4 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -77,10 +77,10 @@ def test_agg_datetimes_mixed():
     )
 
     df1["weights"] = df1["value"] / df1["value"].sum()
-    gb1 = df1.groupby("date").aggregate(np.sum)
+    gb1 = df1.groupby("date").aggregate("sum")
 
     df2["weights"] = df1["value"] / df1["value"].sum()
-    gb2 = df2.groupby("date").aggregate(np.sum)
+    gb2 = df2.groupby("date").aggregate("sum")
 
     assert len(gb1) == len(gb2)
 
@@ -191,12 +191,12 @@ def test_aggregate_api_consistency():
     expected.columns = ["sum", "mean"]
     tm.assert_frame_equal(result, expected, check_like=True)
 
-    result = grouped.agg([np.sum, np.mean])
+    result = grouped.agg(["sum", "mean"])
     expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1)
     expected.columns = MultiIndex.from_product([["C", "D"], ["sum", "mean"]])
     tm.assert_frame_equal(result, expected, check_like=True)
 
-    result = grouped[["D", "C"]].agg([np.sum, np.mean])
+    result = grouped[["D", "C"]].agg(["sum", "mean"])
     expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1)
     expected.columns = MultiIndex.from_product([["D", "C"], ["sum", "mean"]])
     tm.assert_frame_equal(result, expected, check_like=True)
@@ -211,7 +211,7 @@ def test_aggregate_api_consistency():
 
     msg = r"Column\(s\) \['r', 'r2'\] do not exist"
     with pytest.raises(KeyError, match=msg):
-        grouped[["D", "C"]].agg({"r": np.sum, "r2": np.mean})
+        grouped[["D", "C"]].agg({"r": "sum", "r2": "mean"})
 
 
 def test_agg_dict_renaming_deprecation():
@@ -299,7 +299,7 @@ def test_series_agg_multikey():
     ts = tm.makeTimeSeries()
     grouped = ts.groupby([lambda x: x.year, lambda x: x.month])
 
-    result = grouped.agg(np.sum)
+    result = grouped.agg("sum")
     expected = grouped.sum()
     tm.assert_series_equal(result, expected)
 
@@ -406,9 +406,12 @@ def __call__(self, x):
         fn_class(),
     ]
 
-    expected = df.groupby("foo").agg(sum)
+    expected = df.groupby("foo").agg("sum")
     for ecall in equiv_callables:
-        result = df.groupby("foo").agg(ecall)
+        warn = FutureWarning if ecall is sum or ecall is np.sum else None
+        msg = "using DataFrameGroupBy.sum"
+        with tm.assert_produces_warning(warn, match=msg):
+            result = df.groupby("foo").agg(ecall)
         tm.assert_frame_equal(result, expected)
 
 
@@ -476,7 +479,7 @@ def test_agg_timezone_round_trip():
     ts = pd.Timestamp("2016-01-01 12:00:00", tz="US/Pacific")
     df = DataFrame({"a": 1, "b": [ts + dt.timedelta(minutes=nn) for nn in range(10)]})
 
-    result1 = df.groupby("a")["b"].agg(np.min).iloc[0]
+    result1 = df.groupby("a")["b"].agg("min").iloc[0]
     result2 = df.groupby("a")["b"].agg(lambda x: np.min(x)).iloc[0]
     result3 = df.groupby("a")["b"].min().iloc[0]
 
@@ -580,7 +583,9 @@ def test_agg_category_nansum(observed):
     df = DataFrame(
         {"A": pd.Categorical(["a", "a", "b"], categories=categories), "B": [1, 2, 3]}
     )
-    result = df.groupby("A", observed=observed).B.agg(np.nansum)
+    msg = "using SeriesGroupBy.sum"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("A", observed=observed).B.agg(np.nansum)
     expected = Series(
         [3, 3, 0],
         index=pd.CategoricalIndex(["a", "b", "c"], categories=categories, name="A"),
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index b25950192018d..3ab62bb7656b7 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -135,23 +135,33 @@ def f(x):
     df = DataFrame({"a": [5, 15, 25]})
     c = pd.cut(df.a, bins=[0, 10, 20, 30, 40])
 
-    result = df.a.groupby(c, observed=False).transform(sum)
+    msg = "using SeriesGroupBy.sum"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        # GH#53425
+        result = df.a.groupby(c, observed=False).transform(sum)
     tm.assert_series_equal(result, df["a"])
 
     tm.assert_series_equal(
         df.a.groupby(c, observed=False).transform(lambda xs: np.sum(xs)), df["a"]
     )
-    tm.assert_frame_equal(df.groupby(c, observed=False).transform(sum), df[["a"]])
+    msg = "using DataFrameGroupBy.sum"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        # GH#53425
+        result = df.groupby(c, observed=False).transform(sum)
+    expected = df[["a"]]
+    tm.assert_frame_equal(result, expected)
 
     gbc = df.groupby(c, observed=False)
     result = gbc.transform(lambda xs: np.max(xs, axis=0))
     tm.assert_frame_equal(result, df[["a"]])
 
-    with tm.assert_produces_warning(None):
-        result2 = gbc.transform(lambda xs: np.max(xs, axis=0))
+    result2 = gbc.transform(lambda xs: np.max(xs, axis=0))
+    msg = "using DataFrameGroupBy.max"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        # GH#53425
         result3 = gbc.transform(max)
-        result4 = gbc.transform(np.maximum.reduce)
-        result5 = gbc.transform(lambda xs: np.maximum.reduce(xs))
+    result4 = gbc.transform(np.maximum.reduce)
+    result5 = gbc.transform(lambda xs: np.maximum.reduce(xs))
     tm.assert_frame_equal(result2, df[["a"]], check_dtype=False)
     tm.assert_frame_equal(result3, df[["a"]], check_dtype=False)
     tm.assert_frame_equal(result4, df[["a"]])
@@ -165,13 +175,22 @@ def f(x):
     df = DataFrame({"a": [5, 15, 25, -5]})
     c = pd.cut(df.a, bins=[-10, 0, 10, 20, 30, 40])
 
-    result = df.a.groupby(c, observed=False).transform(sum)
+    msg = "using SeriesGroupBy.sum"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        # GH#53425
+        result = df.a.groupby(c, observed=False).transform(sum)
     tm.assert_series_equal(result, df["a"])
 
     tm.assert_series_equal(
         df.a.groupby(c, observed=False).transform(lambda xs: np.sum(xs)), df["a"]
     )
-    tm.assert_frame_equal(df.groupby(c, observed=False).transform(sum), df[["a"]])
+    msg = "using DataFrameGroupBy.sum"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        # GH#53425
+        result = df.groupby(c, observed=False).transform(sum)
+    expected = df[["a"]]
+    tm.assert_frame_equal(result, expected)
+
     tm.assert_frame_equal(
         df.groupby(c, observed=False).transform(lambda xs: np.sum(xs)), df[["a"]]
     )
@@ -294,7 +313,10 @@ def test_apply(ordered):
     result = grouped.mean()
     tm.assert_frame_equal(result, expected)
 
-    result = grouped.agg(np.mean)
+    msg = "using DataFrameGroupBy.mean"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        # GH#53425
+        result = grouped.agg(np.mean)
     tm.assert_frame_equal(result, expected)
 
     # but for transform we should still get back the original index
@@ -1216,7 +1238,10 @@ def test_seriesgroupby_observed_true(df_cat, operation):
     expected = Series(data=[2, 4, 1, 3], index=index, name="C").sort_index()
 
     grouped = df_cat.groupby(["A", "B"], observed=True)["C"]
-    result = getattr(grouped, operation)(sum)
+    msg = "using np.sum" if operation == "apply" else "using SeriesGroupBy.sum"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        # GH#53425
+        result = getattr(grouped, operation)(sum)
     tm.assert_series_equal(result, expected)
 
 
@@ -1239,7 +1264,10 @@ def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation):
         with tm.assert_produces_warning(FutureWarning, match=msg):
             expected = expected.fillna(0, downcast="infer")
     grouped = df_cat.groupby(["A", "B"], observed=observed)["C"]
-    result = getattr(grouped, operation)(sum)
+    msg = "using SeriesGroupBy.sum" if operation == "agg" else "using np.sum"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        # GH#53425
+        result = getattr(grouped, operation)(sum)
     tm.assert_series_equal(result, expected)
 
 
@@ -1664,7 +1692,10 @@ def test_categorical_transform():
         categories=["Waiting", "OnTheWay", "Delivered"], ordered=True
     )
     df["status"] = df["status"].astype(delivery_status_type)
-    df["last_status"] = df.groupby("package_id")["status"].transform(max)
+    msg = "using SeriesGroupBy.max"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        # GH#53425
+        df["last_status"] = df.groupby("package_id")["status"].transform(max)
     result = df.copy()
 
     expected = DataFrame(
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 090ed37d7d1b2..e3a5d308c4346 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -57,8 +57,14 @@ def test_intercept_builtin_sum():
     s = Series([1.0, 2.0, np.nan, 3.0])
     grouped = s.groupby([0, 1, 2, 2])
 
-    result = grouped.agg(builtins.sum)
-    result2 = grouped.apply(builtins.sum)
+    msg = "using SeriesGroupBy.sum"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        # GH#53425
+        result = grouped.agg(builtins.sum)
+    msg = "using np.sum"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        # GH#53425
+        result2 = grouped.apply(builtins.sum)
     expected = grouped.sum()
     tm.assert_series_equal(result, expected)
     tm.assert_series_equal(result2, expected)
@@ -78,7 +84,10 @@ def test_builtins_apply(keys, f):
 
     warn = None if f is not sum else FutureWarning
     msg = "The behavior of DataFrame.sum with axis=None is deprecated"
-    with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
+    with tm.assert_produces_warning(
+        warn, match=msg, check_stacklevel=False, raise_on_extra_warnings=False
+    ):
+        # Also warns on deprecation GH#53425
         result = gb.apply(f)
     ngroups = len(df.drop_duplicates(subset=keys))
 
@@ -370,11 +379,15 @@ def test_cython_median():
     labels[::17] = np.nan
 
     result = df.groupby(labels).median()
-    exp = df.groupby(labels).agg(np.nanmedian)
+    msg = "using DataFrameGroupBy.median"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        exp = df.groupby(labels).agg(np.nanmedian)
     tm.assert_frame_equal(result, exp)
 
     df = DataFrame(np.random.randn(1000, 5))
-    rs = df.groupby(labels).agg(np.median)
+    msg = "using DataFrameGroupBy.median"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        rs = df.groupby(labels).agg(np.median)
     xp = df.groupby(labels).median()
     tm.assert_frame_equal(rs, xp)
 
@@ -682,7 +695,10 @@ def test_ops_general(op, targop):
     labels = np.random.randint(0, 50, size=1000).astype(float)
 
     result = getattr(df.groupby(labels), op)()
-    expected = df.groupby(labels).agg(targop)
+    warn = None if op in ("first", "last", "count", "sem") else FutureWarning
+    msg = f"using DataFrameGroupBy.{op}"
+    with tm.assert_produces_warning(warn, match=msg):
+        expected = df.groupby(labels).agg(targop)
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 79fc631fff87c..ca3fec8a99555 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -78,12 +78,21 @@ def test_basic_aggregations(dtype):
     for k, v in grouped:
         assert len(v) == 3
 
-    agged = grouped.aggregate(np.mean)
+    msg = "using SeriesGroupBy.mean"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        agged = grouped.aggregate(np.mean)
     assert agged[1] == 1
 
-    tm.assert_series_equal(agged, grouped.agg(np.mean))  # shorthand
+    msg = "using SeriesGroupBy.mean"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = grouped.agg(np.mean)
+    tm.assert_series_equal(agged, expected)  # shorthand
     tm.assert_series_equal(agged, grouped.mean())
-    tm.assert_series_equal(grouped.agg(np.sum), grouped.sum())
+    result = grouped.sum()
+    msg = "using SeriesGroupBy.sum"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = grouped.agg(np.sum)
+    tm.assert_series_equal(result, expected)
 
     expected = grouped.apply(lambda x: x * x.sum())
     transformed = grouped.transform(lambda x: x * x.sum())
@@ -91,12 +100,15 @@ def test_basic_aggregations(dtype):
     tm.assert_series_equal(transformed, expected)
 
     value_grouped = data.groupby(data)
-    tm.assert_series_equal(
-        value_grouped.aggregate(np.mean), agged, check_index_type=False
-    )
+    msg = "using SeriesGroupBy.mean"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = value_grouped.aggregate(np.mean)
+    tm.assert_series_equal(result, agged, check_index_type=False)
 
     # complex agg
-    agged = grouped.aggregate([np.mean, np.std])
+    msg = "using SeriesGroupBy.[mean|std]"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        agged = grouped.aggregate([np.mean, np.std])
 
     msg = r"nested renamer is not supported"
     with pytest.raises(SpecificationError, match=msg):
@@ -422,14 +434,14 @@ def test_frame_groupby(tsframe):
     grouped = tsframe.groupby(lambda x: x.weekday())
 
     # aggregate
-    aggregated = grouped.aggregate(np.mean)
+    aggregated = grouped.aggregate("mean")
     assert len(aggregated) == 5
     assert len(aggregated.columns) == 4
 
     # by string
     tscopy = tsframe.copy()
     tscopy["weekday"] = [x.weekday() for x in tscopy.index]
-    stragged = tscopy.groupby("weekday").aggregate(np.mean)
+    stragged = tscopy.groupby("weekday").aggregate("mean")
     tm.assert_frame_equal(stragged, aggregated, check_names=False)
 
     # transform
@@ -465,7 +477,7 @@ def test_frame_groupby_columns(tsframe):
         grouped = tsframe.groupby(mapping, axis=1)
 
     # aggregate
-    aggregated = grouped.aggregate(np.mean)
+    aggregated = grouped.aggregate("mean")
     assert len(aggregated) == len(tsframe)
     assert len(aggregated.columns) == 2
 
@@ -490,22 +502,22 @@ def test_frame_set_name_single(df):
     result = df.groupby("A", as_index=False).mean(numeric_only=True)
     assert result.index.name != "A"
 
-    result = grouped[["C", "D"]].agg(np.mean)
+    result = grouped[["C", "D"]].agg("mean")
     assert result.index.name == "A"
 
-    result = grouped.agg({"C": np.mean, "D": np.std})
+    result = grouped.agg({"C": "mean", "D": "std"})
     assert result.index.name == "A"
 
     result = grouped["C"].mean()
     assert result.index.name == "A"
-    result = grouped["C"].agg(np.mean)
+    result = grouped["C"].agg("mean")
     assert result.index.name == "A"
-    result = grouped["C"].agg([np.mean, np.std])
+    result = grouped["C"].agg(["mean", "std"])
     assert result.index.name == "A"
 
     msg = r"nested renamer is not supported"
     with pytest.raises(SpecificationError, match=msg):
-        grouped["C"].agg({"foo": np.mean, "bar": np.std})
+        grouped["C"].agg({"foo": "mean", "bar": "std"})
 
 
 def test_multi_func(df):
@@ -533,14 +545,14 @@ def test_multi_func(df):
     )
     # only verify that it works for now
     grouped = df.groupby(["k1", "k2"])
-    grouped.agg(np.sum)
+    grouped.agg("sum")
 
 
 def test_multi_key_multiple_functions(df):
     grouped = df.groupby(["A", "B"])["C"]
 
-    agged = grouped.agg([np.mean, np.std])
-    expected = DataFrame({"mean": grouped.agg(np.mean), "std": grouped.agg(np.std)})
+    agged = grouped.agg(["mean", "std"])
+    expected = DataFrame({"mean": grouped.agg("mean"), "std": grouped.agg("std")})
     tm.assert_frame_equal(agged, expected)
 
 
@@ -580,7 +592,7 @@ def test_frame_multi_key_function_list():
     )
 
     grouped = data.groupby(["A", "B"])
-    funcs = [np.mean, np.std]
+    funcs = ["mean", "std"]
     agged = grouped.agg(funcs)
     expected = pd.concat(
         [grouped["D"].agg(funcs), grouped["E"].agg(funcs), grouped["F"].agg(funcs)],
@@ -641,7 +653,7 @@ def test_frame_multi_key_function_list_partial_failure():
     )
 
     grouped = data.groupby(["A", "B"])
-    funcs = [np.mean, np.std]
+    funcs = ["mean", "std"]
     msg = re.escape("agg function failed [how->mean,dtype->object]")
     with pytest.raises(TypeError, match=msg):
         grouped.agg(funcs)
@@ -722,11 +734,11 @@ def test_groupby_as_index_agg(df):
 
     # single-key
 
-    result = grouped[["C", "D"]].agg(np.mean)
+    result = grouped[["C", "D"]].agg("mean")
     expected = grouped.mean(numeric_only=True)
     tm.assert_frame_equal(result, expected)
 
-    result2 = grouped.agg({"C": np.mean, "D": np.sum})
+    result2 = grouped.agg({"C": "mean", "D": "sum"})
     expected2 = grouped.mean(numeric_only=True)
     expected2["D"] = grouped.sum()["D"]
     tm.assert_frame_equal(result2, expected2)
@@ -735,17 +747,17 @@ def test_groupby_as_index_agg(df):
 
     msg = r"nested renamer is not supported"
     with pytest.raises(SpecificationError, match=msg):
-        grouped["C"].agg({"Q": np.sum})
+        grouped["C"].agg({"Q": "sum"})
 
     # multi-key
 
     grouped = df.groupby(["A", "B"], as_index=False)
 
-    result = grouped.agg(np.mean)
+    result = grouped.agg("mean")
     expected = grouped.mean()
     tm.assert_frame_equal(result, expected)
 
-    result2 = grouped.agg({"C": np.mean, "D": np.sum})
+    result2 = grouped.agg({"C": "mean", "D": "sum"})
     expected2 = grouped.mean()
     expected2["D"] = grouped.sum()["D"]
     tm.assert_frame_equal(result2, expected2)
@@ -754,7 +766,7 @@ def test_groupby_as_index_agg(df):
     expected3 = DataFrame(expected3).rename(columns={"C": "Q"})
     msg = "Passing a dictionary to SeriesGroupBy.agg is deprecated"
     with tm.assert_produces_warning(FutureWarning, match=msg):
-        result3 = grouped["C"].agg({"Q": np.sum})
+        result3 = grouped["C"].agg({"Q": "sum"})
     tm.assert_frame_equal(result3, expected3)
 
     # GH7115 & GH8112 & GH8582
@@ -817,13 +829,13 @@ def test_as_index_series_return_frame(df):
     grouped = df.groupby("A", as_index=False)
     grouped2 = df.groupby(["A", "B"], as_index=False)
 
-    result = grouped["C"].agg(np.sum)
-    expected = grouped.agg(np.sum).loc[:, ["A", "C"]]
+    result = grouped["C"].agg("sum")
+    expected = grouped.agg("sum").loc[:, ["A", "C"]]
     assert isinstance(result, DataFrame)
     tm.assert_frame_equal(result, expected)
 
-    result2 = grouped2["C"].agg(np.sum)
-    expected2 = grouped2.agg(np.sum).loc[:, ["A", "B", "C"]]
+    result2 = grouped2["C"].agg("sum")
+    expected2 = grouped2.agg("sum").loc[:, ["A", "B", "C"]]
     assert isinstance(result2, DataFrame)
     tm.assert_frame_equal(result2, expected2)
 
@@ -928,7 +940,7 @@ def test_raises_on_nuisance(df):
     grouped = df.groupby("A")
     msg = re.escape("agg function failed [how->mean,dtype->object]")
     with pytest.raises(TypeError, match=msg):
-        grouped.agg(np.mean)
+        grouped.agg("mean")
     with pytest.raises(TypeError, match=msg):
         grouped.mean()
 
@@ -937,7 +949,7 @@ def test_raises_on_nuisance(df):
     grouped = df.groupby("A")
     msg = "datetime64 type does not support sum operations"
     with pytest.raises(TypeError, match=msg):
-        grouped.agg(np.sum)
+        grouped.agg("sum")
     with pytest.raises(TypeError, match=msg):
         grouped.sum()
 
@@ -1009,7 +1021,7 @@ def test_raise_on_nuisance_python_multiple(three_group):
     grouped = three_group.groupby(["A", "B"])
     msg = re.escape("agg function failed [how->mean,dtype->object]")
     with pytest.raises(TypeError, match=msg):
-        grouped.agg(np.mean)
+        grouped.agg("mean")
     with pytest.raises(TypeError, match=msg):
         grouped.mean()
 
@@ -1027,13 +1039,13 @@ def test_empty_groups_corner(mframe):
     )
 
     grouped = df.groupby(["k1", "k2"])
-    result = grouped[["v1", "v2"]].agg(np.mean)
+    result = grouped[["v1", "v2"]].agg("mean")
     expected = grouped.mean(numeric_only=True)
     tm.assert_frame_equal(result, expected)
 
     grouped = mframe[3:5].groupby(level=0)
     agged = grouped.apply(lambda x: x.mean())
-    agged_A = grouped["A"].apply(np.mean)
+    agged_A = grouped["A"].apply("mean")
     tm.assert_series_equal(agged["A"], agged_A)
     assert agged.index.name == "first"
 
@@ -1052,8 +1064,8 @@ def test_wrap_aggregated_output_multindex(mframe):
     keys = [np.array([0, 0, 1]), np.array([0, 0, 1])]
     msg = re.escape("agg function failed [how->mean,dtype->object]")
     with pytest.raises(TypeError, match=msg):
-        df.groupby(keys).agg(np.mean)
-    agged = df.drop(columns=("baz", "two")).groupby(keys).agg(np.mean)
+        df.groupby(keys).agg("mean")
+    agged = df.drop(columns=("baz", "two")).groupby(keys).agg("mean")
     assert isinstance(agged.columns, MultiIndex)
 
     def aggfun(ser):
@@ -1201,7 +1213,7 @@ def test_groupby_with_hier_columns():
     result = gb.mean()
     tm.assert_index_equal(result.index, df.index)
 
-    result = df.groupby(level=0).agg(np.mean)
+    result = df.groupby(level=0).agg("mean")
     tm.assert_index_equal(result.columns, columns)
 
     result = df.groupby(level=0).apply(lambda x: x.mean())
@@ -1242,7 +1254,7 @@ def test_groupby_wrong_multi_labels():
 
     grouped = data.groupby(["foo", "bar", "baz", "spam"])
 
-    result = grouped.agg(np.mean)
+    result = grouped.agg("mean")
     expected = grouped.mean()
     tm.assert_frame_equal(result, expected)
 
@@ -1602,7 +1614,7 @@ def test_no_nonsense_name(float_frame):
     s = float_frame["C"].copy()
     s.name = None
 
-    result = s.groupby(float_frame["A"]).agg(np.sum)
+    result = s.groupby(float_frame["A"]).agg("sum")
     assert result.name is None
 
 
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index ab268a1d94b96..03e3086b8c847 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -231,7 +231,7 @@ def test_groupby_dropna_multi_index_dataframe_agg(dropna, tuples, outputs):
         ["A", "B", 1, 1, 1.0],
     ]
     df = pd.DataFrame(df_list, columns=["a", "b", "c", "d", "e"])
-    agg_dict = {"c": sum, "d": max, "e": "min"}
+    agg_dict = {"c": "sum", "d": "max", "e": "min"}
     grouped = df.groupby(["a", "b"], dropna=dropna).agg(agg_dict)
 
     mi = pd.MultiIndex.from_tuples(tuples, names=list("ab"))
@@ -278,7 +278,7 @@ def test_groupby_dropna_datetime_like_data(
     else:
         indexes = [datetime1, datetime2, np.nan]
 
-    grouped = df.groupby("dt", dropna=dropna).agg({"values": sum})
+    grouped = df.groupby("dt", dropna=dropna).agg({"values": "sum"})
     expected = pd.DataFrame({"values": values}, index=pd.Index(indexes, name="dt"))
 
     tm.assert_frame_equal(grouped, expected)
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 41b7dde4bf631..1e9c4b446c4d0 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -63,7 +63,7 @@ def test_column_select_via_attr(self, df):
 
         df["mean"] = 1.5
         result = df.groupby("A").mean(numeric_only=True)
-        expected = df.groupby("A")[["C", "D", "mean"]].agg(np.mean)
+        expected = df.groupby("A")[["C", "D", "mean"]].agg("mean")
         tm.assert_frame_equal(result, expected)
 
     def test_getitem_list_of_columns(self):
@@ -399,15 +399,15 @@ def test_groupby_grouper(self, df):
     def test_groupby_dict_mapping(self):
         # GH #679
         s = Series({"T1": 5})
-        result = s.groupby({"T1": "T2"}).agg(sum)
-        expected = s.groupby(["T2"]).agg(sum)
+        result = s.groupby({"T1": "T2"}).agg("sum")
+        expected = s.groupby(["T2"]).agg("sum")
         tm.assert_series_equal(result, expected)
 
         s = Series([1.0, 2.0, 3.0, 4.0], index=list("abcd"))
         mapping = {"a": 0, "b": 0, "c": 1, "d": 1}
 
         result = s.groupby(mapping).mean()
-        result2 = s.groupby(mapping).agg(np.mean)
+        result2 = s.groupby(mapping).agg("mean")
         exp_key = np.array([0, 0, 1, 1], dtype=np.int64)
         expected = s.groupby(exp_key).mean()
         expected2 = s.groupby(exp_key).mean()
diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
index 180755c1dca12..a3fa5bf794030 100644
--- a/pandas/tests/groupby/test_raises.py
+++ b/pandas/tests/groupby/test_raises.py
@@ -229,7 +229,12 @@ def test_groupby_raises_string_np(
         ),
     }[groupby_func_np]
 
-    _call_and_check(klass, msg, how, gb, groupby_func_np, ())
+    if groupby_series:
+        warn_msg = "using SeriesGroupBy.[sum|mean]"
+    else:
+        warn_msg = "using DataFrameGroupBy.[sum|mean]"
+    with tm.assert_produces_warning(FutureWarning, match=warn_msg):
+        _call_and_check(klass, msg, how, gb, groupby_func_np, ())
 
 
 @pytest.mark.parametrize("how", ["method", "agg", "transform"])
@@ -333,7 +338,12 @@ def test_groupby_raises_datetime_np(
         np.mean: (None, ""),
     }[groupby_func_np]
 
-    _call_and_check(klass, msg, how, gb, groupby_func_np, ())
+    if groupby_series:
+        warn_msg = "using SeriesGroupBy.[sum|mean]"
+    else:
+        warn_msg = "using DataFrameGroupBy.[sum|mean]"
+    with tm.assert_produces_warning(FutureWarning, match=warn_msg):
+        _call_and_check(klass, msg, how, gb, groupby_func_np, ())
 
 
 @pytest.mark.parametrize("func", ["prod", "cumprod", "skew", "var"])
@@ -526,7 +536,12 @@ def test_groupby_raises_category_np(
         ),
     }[groupby_func_np]
 
-    _call_and_check(klass, msg, how, gb, groupby_func_np, ())
+    if groupby_series:
+        warn_msg = "using SeriesGroupBy.[sum|mean]"
+    else:
+        warn_msg = "using DataFrameGroupBy.[sum|mean]"
+    with tm.assert_produces_warning(FutureWarning, match=warn_msg):
+        _call_and_check(klass, msg, how, gb, groupby_func_np, ())
 
 
 @pytest.mark.parametrize("how", ["method", "agg", "transform"])
diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py
index 04b99939514e6..60c35064d9aa7 100644
--- a/pandas/tests/groupby/test_timegrouper.py
+++ b/pandas/tests/groupby/test_timegrouper.py
@@ -725,7 +725,7 @@ def test_groupby_datetime64_32_bit(self):
         # 32-bit under 1.9-dev indexing issue
 
         df = DataFrame({"A": range(2), "B": [Timestamp("2000-01-1")] * 2})
-        result = df.groupby("A")["B"].transform(min)
+        result = df.groupby("A")["B"].transform("min")
         expected = Series([Timestamp("2000-01-1")] * 2, name="B")
         tm.assert_series_equal(result, expected)
 
@@ -918,11 +918,11 @@ def test_groupby_agg_numba_timegrouper_with_nat(
             lambda values, index: np.nanmean(values), engine="numba"
         )
 
-        expected = gb["Quantity"].aggregate(np.nanmean)
+        expected = gb["Quantity"].aggregate("mean")
         tm.assert_series_equal(result, expected)
 
         result_df = gb[["Quantity"]].aggregate(
             lambda values, index: np.nanmean(values), engine="numba"
         )
-        expected_df = gb[["Quantity"]].aggregate(np.nanmean)
+        expected_df = gb[["Quantity"]].aggregate("mean")
         tm.assert_frame_equal(result_df, expected_df)
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index cf41b4ff57331..c84ee3114b71f 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -75,7 +75,9 @@ def demean(arr):
 
     # GH 9700
     df = DataFrame({"a": range(5, 10), "b": range(5)})
-    result = df.groupby("a").transform(max)
+    msg = "using DataFrameGroupBy.max"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby("a").transform(max)
     expected = DataFrame({"b": range(5)})
     tm.assert_frame_equal(result, expected)
 
@@ -88,7 +90,9 @@ def test_transform_fast():
     values = np.repeat(grp.mean().values, ensure_platform_int(grp.count().values))
     expected = Series(values, index=df.index, name="val")
 
-    result = grp.transform(np.mean)
+    msg = "using SeriesGroupBy.mean"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = grp.transform(np.mean)
     tm.assert_series_equal(result, expected)
 
     result = grp.transform("mean")
@@ -132,14 +136,18 @@ def test_transform_fast():
 
 def test_transform_broadcast(tsframe, ts):
     grouped = ts.groupby(lambda x: x.month)
-    result = grouped.transform(np.mean)
+    msg = "using SeriesGroupBy.mean"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = grouped.transform(np.mean)
 
     tm.assert_index_equal(result.index, ts.index)
     for _, gp in grouped:
         assert_fp_equal(result.reindex(gp.index), gp.mean())
 
     grouped = tsframe.groupby(lambda x: x.month)
-    result = grouped.transform(np.mean)
+    msg = "using DataFrameGroupBy.mean"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = grouped.transform(np.mean)
     tm.assert_index_equal(result.index, tsframe.index)
     for _, gp in grouped:
         agged = gp.mean(axis=0)
@@ -151,7 +159,9 @@ def test_transform_broadcast(tsframe, ts):
     msg = "DataFrame.groupby with axis=1 is deprecated"
     with tm.assert_produces_warning(FutureWarning, match=msg):
         grouped = tsframe.groupby({"A": 0, "B": 0, "C": 1, "D": 1}, axis=1)
-    result = grouped.transform(np.mean)
+    msg = "using DataFrameGroupBy.mean"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = grouped.transform(np.mean)
     tm.assert_index_equal(result.index, tsframe.index)
     tm.assert_index_equal(result.columns, tsframe.columns)
     for _, gp in grouped:
@@ -348,7 +358,10 @@ def test_transform_multiple(ts):
     grouped = ts.groupby([lambda x: x.year, lambda x: x.month])
 
     grouped.transform(lambda x: x * 2)
-    grouped.transform(np.mean)
+
+    msg = "using SeriesGroupBy.mean"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        grouped.transform(np.mean)
 
 
 def test_dispatch_transform(tsframe):
@@ -464,11 +477,15 @@ def test_transform_nuisance_raises(df):
 
 def test_transform_function_aliases(df):
     result = df.groupby("A").transform("mean", numeric_only=True)
-    expected = df.groupby("A")[["C", "D"]].transform(np.mean)
+    msg = "using DataFrameGroupBy.mean"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = df.groupby("A")[["C", "D"]].transform(np.mean)
     tm.assert_frame_equal(result, expected)
 
     result = df.groupby("A")["C"].transform("mean")
-    expected = df.groupby("A")["C"].transform(np.mean)
+    msg = "using SeriesGroupBy.mean"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = df.groupby("A")["C"].transform(np.mean)
     tm.assert_series_equal(result, expected)
 
 
@@ -496,12 +513,14 @@ def test_transform_length():
     def nsum(x):
         return np.nansum(x)
 
-    results = [
-        df.groupby("col1").transform(sum)["col2"],
-        df.groupby("col1")["col2"].transform(sum),
-        df.groupby("col1").transform(nsum)["col2"],
-        df.groupby("col1")["col2"].transform(nsum),
-    ]
+    msg = "using DataFrameGroupBy.sum"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        results = [
+            df.groupby("col1").transform(sum)["col2"],
+            df.groupby("col1")["col2"].transform(sum),
+            df.groupby("col1").transform(nsum)["col2"],
+            df.groupby("col1")["col2"].transform(nsum),
+        ]
     for result in results:
         tm.assert_series_equal(result, expected, check_names=False)
 
@@ -513,7 +532,9 @@ def test_transform_coercion():
     df = DataFrame({"A": ["a", "a", "b", "b"], "B": [0, 1, 3, 4]})
     g = df.groupby("A")
 
-    expected = g.transform(np.mean)
+    msg = "using DataFrameGroupBy.mean"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = g.transform(np.mean)
 
     result = g.transform(lambda x: np.mean(x, axis=0))
     tm.assert_frame_equal(result, expected)
@@ -584,7 +605,9 @@ def test_groupby_transform_with_int():
 def test_groupby_transform_with_nan_group():
     # GH 9941
     df = DataFrame({"a": range(10), "b": [1, 1, 2, 3, np.nan, 4, 4, 5, 5, 5]})
-    result = df.groupby(df.b)["a"].transform(max)
+    msg = "using SeriesGroupBy.max"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby(df.b)["a"].transform(max)
     expected = Series([1.0, 1.0, 2.0, 3.0, np.nan, 6.0, 6.0, 9.0, 9.0, 9.0], name="a")
     tm.assert_series_equal(result, expected)
 
@@ -1085,7 +1108,9 @@ def test_any_all_np_func(func):
 
     exp = Series([True, np.nan, True], name="val")
 
-    res = df.groupby("key")["val"].transform(func)
+    msg = "using SeriesGroupBy.[any|all]"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        res = df.groupby("key")["val"].transform(func)
     tm.assert_series_equal(res, exp)
 
 
@@ -1115,7 +1140,10 @@ def test_groupby_transform_timezone_column(func):
     # GH 24198
     ts = pd.to_datetime("now", utc=True).tz_convert("Asia/Singapore")
     result = DataFrame({"end_time": [ts], "id": [1]})
-    result["max_end_time"] = result.groupby("id").end_time.transform(func)
+    warn = FutureWarning if not isinstance(func, str) else None
+    msg = "using SeriesGroupBy.[min|max]"
+    with tm.assert_produces_warning(warn, match=msg):
+        result["max_end_time"] = result.groupby("id").end_time.transform(func)
     expected = DataFrame([[ts, 1, ts]], columns=["end_time", "id", "max_end_time"])
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py
index b82afab49954d..e57d938f060df 100644
--- a/pandas/tests/resample/test_base.py
+++ b/pandas/tests/resample/test_base.py
@@ -299,7 +299,7 @@ def test_apply_to_empty_series(empty_series_dti, freq):
         return
 
     result = ser.resample(freq, group_keys=False).apply(lambda x: 1)
-    expected = ser.resample(freq).apply(np.sum)
+    expected = ser.resample(freq).apply("sum")
 
     tm.assert_series_equal(result, expected, check_dtype=False)
 
diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
index 0f52f2d1c65ee..3c6f75bdcfc46 100644
--- a/pandas/tests/resample/test_datetime_index.py
+++ b/pandas/tests/resample/test_datetime_index.py
@@ -87,7 +87,7 @@ def test_custom_grouper(index, unit):
     expect = Series(arr, index=idx)
 
     # GH2763 - return input dtype if we can
-    result = g.agg(np.sum)
+    result = g.agg("sum")
     tm.assert_series_equal(result, expect)
 
 
@@ -95,7 +95,7 @@ def test_custom_grouper_df(index, unit):
     b = Grouper(freq=Minute(5), closed="right", label="right")
     dti = index.as_unit(unit)
     df = DataFrame(np.random.rand(len(dti), 10), index=dti, dtype="float64")
-    r = df.groupby(b).agg(np.sum)
+    r = df.groupby(b).agg("sum")
 
     assert len(r.columns) == 10
     assert len(r.index) == 2593
@@ -1847,7 +1847,9 @@ def test_resample_apply_product(duplicates, unit):
     if duplicates:
         df.columns = ["A", "A"]
 
-    result = df.resample("Q").apply(np.prod)
+    msg = "using DatetimeIndexResampler.prod"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.resample("Q").apply(np.prod)
     expected = DataFrame(
         np.array([[0, 24], [60, 210], [336, 720], [990, 1716]], dtype=np.int64),
         index=DatetimeIndex(
diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py
index 9c3ccd96a8d59..20b997bdca873 100644
--- a/pandas/tests/resample/test_period_index.py
+++ b/pandas/tests/resample/test_period_index.py
@@ -524,7 +524,7 @@ def test_resample_tz_localized(self):
         )
         result = (
             ts.resample("A")
-            .agg({"first": np.sum, "second": np.mean})
+            .agg({"first": "sum", "second": "mean"})
             .reindex(columns=["first", "second"])
         )
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index dbd28868b81b1..6aa59d8b3d164 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -401,6 +401,7 @@ def test_agg():
 
     expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
     expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
+    msg = "using SeriesGroupBy.[mean|std]"
     for t in cases:
         # In case 2, "date" is an index and a column, so get included in the agg
         if t == cases[2]:
@@ -410,21 +411,26 @@ def test_agg():
             exp.columns = pd.MultiIndex.from_product(
                 [["date", "A", "B"], ["mean", "std"]]
             )
-            result = t.aggregate([np.mean, np.std])
+            with tm.assert_produces_warning(FutureWarning, match=msg):
+                result = t.aggregate([np.mean, np.std])
             tm.assert_frame_equal(result, exp)
         else:
-            result = t.aggregate([np.mean, np.std])
+            with tm.assert_produces_warning(FutureWarning, match=msg):
+                result = t.aggregate([np.mean, np.std])
             tm.assert_frame_equal(result, expected)
 
     expected = pd.concat([a_mean, b_std], axis=1)
     for t in cases:
-        result = t.aggregate({"A": np.mean, "B": np.std})
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = t.aggregate({"A": np.mean, "B": np.std})
         tm.assert_frame_equal(result, expected, check_like=True)
 
-        result = t.aggregate(A=("A", np.mean), B=("B", np.std))
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = t.aggregate(A=("A", np.mean), B=("B", np.std))
         tm.assert_frame_equal(result, expected, check_like=True)
 
-        result = t.aggregate(A=NamedAgg("A", np.mean), B=NamedAgg("B", np.std))
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = t.aggregate(A=NamedAgg("A", np.mean), B=NamedAgg("B", np.std))
         tm.assert_frame_equal(result, expected, check_like=True)
 
     expected = pd.concat([a_mean, a_std], axis=1)
@@ -501,18 +507,22 @@ def test_agg_misc():
     ]
 
     # passed lambda
+    msg = "using SeriesGroupBy.sum"
     for t in cases:
-        result = t.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)})
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = t.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)})
         rcustom = t["B"].apply(lambda x: np.std(x, ddof=1))
         expected = pd.concat([r["A"].sum(), rcustom], axis=1)
         tm.assert_frame_equal(result, expected, check_like=True)
 
-        result = t.agg(A=("A", np.sum), B=("B", lambda x: np.std(x, ddof=1)))
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = t.agg(A=("A", np.sum), B=("B", lambda x: np.std(x, ddof=1)))
         tm.assert_frame_equal(result, expected, check_like=True)
 
-        result = t.agg(
-            A=NamedAgg("A", np.sum), B=NamedAgg("B", lambda x: np.std(x, ddof=1))
-        )
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = t.agg(
+                A=NamedAgg("A", np.sum), B=NamedAgg("B", lambda x: np.std(x, ddof=1))
+            )
         tm.assert_frame_equal(result, expected, check_like=True)
 
     # agg with renamers
diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py
index a5fb48f801522..2cd47296d5cab 100644
--- a/pandas/tests/resample/test_time_grouper.py
+++ b/pandas/tests/resample/test_time_grouper.py
@@ -54,7 +54,9 @@ def test_count(test_series):
 def test_numpy_reduction(test_series):
     result = test_series.resample("A", closed="right").prod()
 
-    expected = test_series.groupby(lambda x: x.year).agg(np.prod)
+    msg = "using SeriesGroupBy.prod"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        expected = test_series.groupby(lambda x: x.year).agg(np.prod)
     expected.index = result.index
 
     tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
index ffdff75e53cf7..179748f0506b5 100644
--- a/pandas/tests/reshape/merge/test_join.py
+++ b/pandas/tests/reshape/merge/test_join.py
@@ -425,7 +425,7 @@ def test_join_hierarchical_mixed_raises(self):
         # GH 2024
         # GH 40993: For raising, enforced in 2.0
         df = DataFrame([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "c"])
-        new_df = df.groupby(["a"]).agg({"b": [np.mean, np.sum]})
+        new_df = df.groupby(["a"]).agg({"b": ["mean", "sum"]})
         other_df = DataFrame([(1, 2, 3), (7, 10, 6)], columns=["a", "b", "d"])
         other_df.set_index("a", inplace=True)
         # GH 9455, 12219
diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py
index 1bcc86c4908a2..382c102f1194f 100644
--- a/pandas/tests/reshape/test_crosstab.py
+++ b/pandas/tests/reshape/test_crosstab.py
@@ -212,13 +212,13 @@ def test_crosstab_pass_values(self):
         values = np.random.randn(100)
 
         table = crosstab(
-            [a, b], c, values, aggfunc=np.sum, rownames=["foo", "bar"], colnames=["baz"]
+            [a, b], c, values, aggfunc="sum", rownames=["foo", "bar"], colnames=["baz"]
         )
 
         df = DataFrame({"foo": a, "bar": b, "baz": c, "values": values})
 
         expected = df.pivot_table(
-            "values", index=["foo", "bar"], columns="baz", aggfunc=np.sum
+            "values", index=["foo", "bar"], columns="baz", aggfunc="sum"
         )
         tm.assert_frame_equal(table, expected)
 
@@ -452,9 +452,11 @@ def test_crosstab_normalize_arrays(self):
             index=Index([1, 2, "All"], name="a", dtype="object"),
             columns=Index([3, 4, "All"], name="b", dtype="object"),
         )
-        test_case = crosstab(
-            df.a, df.b, df.c, aggfunc=np.sum, normalize="all", margins=True
-        )
+        msg = "using DataFrameGroupBy.sum"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            test_case = crosstab(
+                df.a, df.b, df.c, aggfunc=np.sum, normalize="all", margins=True
+            )
         tm.assert_frame_equal(test_case, norm_sum)
 
     def test_crosstab_with_empties(self, using_array_manager):
@@ -655,14 +657,17 @@ def test_crosstab_normalize_multiple_columns(self):
                 "E": [0] * 24,
             }
         )
-        result = crosstab(
-            [df.A, df.B],
-            df.C,
-            values=df.D,
-            aggfunc=np.sum,
-            normalize=True,
-            margins=True,
-        )
+
+        msg = "using DataFrameGroupBy.sum"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = crosstab(
+                [df.A, df.B],
+                df.C,
+                values=df.D,
+                aggfunc=np.sum,
+                normalize=True,
+                margins=True,
+            )
         expected = DataFrame(
             np.array([0] * 29 + [1], dtype=float).reshape(10, 3),
             columns=Index(["bar", "foo", "All"], dtype="object", name="C"),
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index b6fcb27faf146..1e122442cd40c 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -114,7 +114,7 @@ def test_pivot_table(self, observed, data):
         else:
             assert table.columns.name == columns[0]
 
-        expected = data.groupby(index + [columns])["D"].agg(np.mean).unstack()
+        expected = data.groupby(index + [columns])["D"].agg("mean").unstack()
         tm.assert_frame_equal(table, expected)
 
     def test_pivot_table_categorical_observed_equal(self, observed):
@@ -124,7 +124,7 @@ def test_pivot_table_categorical_observed_equal(self, observed):
         )
 
         expected = df.pivot_table(
-            index="col1", values="col3", columns="col2", aggfunc=np.sum, fill_value=0
+            index="col1", values="col3", columns="col2", aggfunc="sum", fill_value=0
         )
 
         expected.index = expected.index.astype("category")
@@ -137,7 +137,7 @@ def test_pivot_table_categorical_observed_equal(self, observed):
             index="col1",
             values="col3",
             columns="col2",
-            aggfunc=np.sum,
+            aggfunc="sum",
             fill_value=0,
             observed=observed,
         )
@@ -148,8 +148,8 @@ def test_pivot_table_nocols(self):
         df = DataFrame(
             {"rows": ["a", "b", "c"], "cols": ["x", "y", "z"], "values": [1, 2, 3]}
         )
-        rs = df.pivot_table(columns="cols", aggfunc=np.sum)
-        xp = df.pivot_table(index="cols", aggfunc=np.sum).T
+        rs = df.pivot_table(columns="cols", aggfunc="sum")
+        xp = df.pivot_table(index="cols", aggfunc="sum").T
         tm.assert_frame_equal(rs, xp)
 
         rs = df.pivot_table(columns="cols", aggfunc={"values": "mean"})
@@ -345,7 +345,7 @@ def test_pivot_table_multiple(self, data):
         index = ["A", "B"]
         columns = "C"
         table = pivot_table(data, index=index, columns=columns)
-        expected = data.groupby(index + [columns]).agg(np.mean).unstack()
+        expected = data.groupby(index + [columns]).agg("mean").unstack()
         tm.assert_frame_equal(table, expected)
 
     def test_pivot_dtypes(self):
@@ -360,7 +360,7 @@ def test_pivot_dtypes(self):
         assert f.dtypes["v"] == "int64"
 
         z = pivot_table(
-            f, values="v", index=["a"], columns=["i"], fill_value=0, aggfunc=np.sum
+            f, values="v", index=["a"], columns=["i"], fill_value=0, aggfunc="sum"
         )
         result = z.dtypes
         expected = Series([np.dtype("int64")] * 2, index=Index(list("ab"), name="i"))
@@ -377,7 +377,7 @@ def test_pivot_dtypes(self):
         assert f.dtypes["v"] == "float64"
 
         z = pivot_table(
-            f, values="v", index=["a"], columns=["i"], fill_value=0, aggfunc=np.mean
+            f, values="v", index=["a"], columns=["i"], fill_value=0, aggfunc="mean"
         )
         result = z.dtypes
         expected = Series([np.dtype("float64")] * 2, index=Index(list("ab"), name="i"))
@@ -461,9 +461,9 @@ def test_pivot_multi_functions(self, data):
         f = lambda func: pivot_table(
             data, values=["D", "E"], index=["A", "B"], columns="C", aggfunc=func
         )
-        result = f([np.mean, np.std])
-        means = f(np.mean)
-        stds = f(np.std)
+        result = f(["mean", "std"])
+        means = f("mean")
+        stds = f("std")
         expected = concat([means, stds], keys=["mean", "std"], axis=1)
         tm.assert_frame_equal(result, expected)
 
@@ -476,9 +476,9 @@ def test_pivot_multi_functions(self, data):
             aggfunc=func,
             margins=True,
         )
-        result = f([np.mean, np.std])
-        means = f(np.mean)
-        stds = f(np.std)
+        result = f(["mean", "std"])
+        means = f("mean")
+        stds = f("std")
         expected = concat([means, stds], keys=["mean", "std"], axis=1)
         tm.assert_frame_equal(result, expected)
 
@@ -633,7 +633,7 @@ def test_pivot_tz_in_values(self):
             values="ts",
             index=["uid"],
             columns=[mins],
-            aggfunc=np.min,
+            aggfunc="min",
         )
         expected = DataFrame(
             [
@@ -897,7 +897,7 @@ def _check_output(
     def test_margins(self, data):
         # column specified
         result = data.pivot_table(
-            values="D", index=["A", "B"], columns="C", margins=True, aggfunc=np.mean
+            values="D", index=["A", "B"], columns="C", margins=True, aggfunc="mean"
         )
         self._check_output(result, "D", data)
 
@@ -907,14 +907,14 @@ def test_margins(self, data):
             index=["A", "B"],
             columns="C",
             margins=True,
-            aggfunc=np.mean,
+            aggfunc="mean",
             margins_name="Totals",
         )
         self._check_output(result, "D", data, margins_col="Totals")
 
         # no column specified
         table = data.pivot_table(
-            index=["A", "B"], columns="C", margins=True, aggfunc=np.mean
+            index=["A", "B"], columns="C", margins=True, aggfunc="mean"
         )
         for value_col in table.columns.levels[0]:
             self._check_output(table[value_col], value_col, data)
@@ -926,9 +926,9 @@ def test_no_col(self, data):
         data.columns = [k * 2 for k in data.columns]
         msg = re.escape("agg function failed [how->mean,dtype->object]")
         with pytest.raises(TypeError, match=msg):
-            data.pivot_table(index=["AA", "BB"], margins=True, aggfunc=np.mean)
+            data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean")
         table = data.drop(columns="CC").pivot_table(
-            index=["AA", "BB"], margins=True, aggfunc=np.mean
+            index=["AA", "BB"], margins=True, aggfunc="mean"
         )
         for value_col in table.columns:
             totals = table.loc[("All", ""), value_col]
@@ -948,7 +948,7 @@ def test_no_col(self, data):
         [
             (
                 "A",
-                np.mean,
+                "mean",
                 [[5.5, 5.5, 2.2, 2.2], [8.0, 8.0, 4.4, 4.4]],
                 Index(["bar", "All", "foo", "All"], name="A"),
             ),
@@ -1027,7 +1027,7 @@ def test_margins_dtype(self, data):
             index=["A", "B"],
             columns="C",
             margins=True,
-            aggfunc=np.sum,
+            aggfunc="sum",
             fill_value=0,
         )
 
@@ -1274,7 +1274,7 @@ def test_pivot_timegrouper(self, using_array_manager):
             index=Grouper(freq="A"),
             columns="Buyer",
             values="Quantity",
-            aggfunc=np.sum,
+            aggfunc="sum",
         )
         tm.assert_frame_equal(result, expected)
 
@@ -1283,7 +1283,7 @@ def test_pivot_timegrouper(self, using_array_manager):
             index="Buyer",
             columns=Grouper(freq="A"),
             values="Quantity",
-            aggfunc=np.sum,
+            aggfunc="sum",
         )
         tm.assert_frame_equal(result, expected.T)
 
@@ -1305,7 +1305,7 @@ def test_pivot_timegrouper(self, using_array_manager):
             index=Grouper(freq="6MS"),
             columns="Buyer",
             values="Quantity",
-            aggfunc=np.sum,
+            aggfunc="sum",
         )
         tm.assert_frame_equal(result, expected)
 
@@ -1314,7 +1314,7 @@ def test_pivot_timegrouper(self, using_array_manager):
             index="Buyer",
             columns=Grouper(freq="6MS"),
             values="Quantity",
-            aggfunc=np.sum,
+            aggfunc="sum",
         )
         tm.assert_frame_equal(result, expected.T)
 
@@ -1325,7 +1325,7 @@ def test_pivot_timegrouper(self, using_array_manager):
             index=Grouper(freq="6MS", key="Date"),
             columns="Buyer",
             values="Quantity",
-            aggfunc=np.sum,
+            aggfunc="sum",
         )
         tm.assert_frame_equal(result, expected)
 
@@ -1334,7 +1334,7 @@ def test_pivot_timegrouper(self, using_array_manager):
             index="Buyer",
             columns=Grouper(freq="6MS", key="Date"),
             values="Quantity",
-            aggfunc=np.sum,
+            aggfunc="sum",
         )
         tm.assert_frame_equal(result, expected.T)
 
@@ -1345,7 +1345,7 @@ def test_pivot_timegrouper(self, using_array_manager):
                 index=Grouper(freq="6MS", key="foo"),
                 columns="Buyer",
                 values="Quantity",
-                aggfunc=np.sum,
+                aggfunc="sum",
             )
         with pytest.raises(KeyError, match=msg):
             pivot_table(
@@ -1353,7 +1353,7 @@ def test_pivot_timegrouper(self, using_array_manager):
                 index="Buyer",
                 columns=Grouper(freq="6MS", key="foo"),
                 values="Quantity",
-                aggfunc=np.sum,
+                aggfunc="sum",
             )
 
         # passing the level
@@ -1363,7 +1363,7 @@ def test_pivot_timegrouper(self, using_array_manager):
             index=Grouper(freq="6MS", level="Date"),
             columns="Buyer",
             values="Quantity",
-            aggfunc=np.sum,
+            aggfunc="sum",
         )
         tm.assert_frame_equal(result, expected)
 
@@ -1372,7 +1372,7 @@ def test_pivot_timegrouper(self, using_array_manager):
             index="Buyer",
             columns=Grouper(freq="6MS", level="Date"),
             values="Quantity",
-            aggfunc=np.sum,
+            aggfunc="sum",
         )
         tm.assert_frame_equal(result, expected.T)
 
@@ -1383,7 +1383,7 @@ def test_pivot_timegrouper(self, using_array_manager):
                 index=Grouper(freq="6MS", level="foo"),
                 columns="Buyer",
                 values="Quantity",
-                aggfunc=np.sum,
+                aggfunc="sum",
             )
         with pytest.raises(ValueError, match=msg):
             pivot_table(
@@ -1391,7 +1391,7 @@ def test_pivot_timegrouper(self, using_array_manager):
                 index="Buyer",
                 columns=Grouper(freq="6MS", level="foo"),
                 values="Quantity",
-                aggfunc=np.sum,
+                aggfunc="sum",
             )
 
     def test_pivot_timegrouper_double(self):
@@ -1429,7 +1429,7 @@ def test_pivot_timegrouper_double(self):
             index=Grouper(freq="M", key="Date"),
             columns=Grouper(freq="M", key="PayDay"),
             values="Quantity",
-            aggfunc=np.sum,
+            aggfunc="sum",
         )
         expected = DataFrame(
             np.array(
@@ -1481,7 +1481,7 @@ def test_pivot_timegrouper_double(self):
             index=Grouper(freq="M", key="PayDay"),
             columns=Grouper(freq="M", key="Date"),
             values="Quantity",
-            aggfunc=np.sum,
+            aggfunc="sum",
         )
         tm.assert_frame_equal(result, expected.T)
 
@@ -1508,7 +1508,7 @@ def test_pivot_timegrouper_double(self):
             index=[Grouper(freq="M", key="Date"), Grouper(freq="M", key="PayDay")],
             columns=["Branch"],
             values="Quantity",
-            aggfunc=np.sum,
+            aggfunc="sum",
         )
         tm.assert_frame_equal(result, expected)
 
@@ -1517,7 +1517,7 @@ def test_pivot_timegrouper_double(self):
             index=["Branch"],
             columns=[Grouper(freq="M", key="Date"), Grouper(freq="M", key="PayDay")],
             values="Quantity",
-            aggfunc=np.sum,
+            aggfunc="sum",
         )
         tm.assert_frame_equal(result, expected.T)
 
@@ -1588,7 +1588,7 @@ def test_pivot_datetime_tz(self):
             index=["dt1"],
             columns=["dt2"],
             values=["value1", "value2"],
-            aggfunc=[np.sum, np.mean],
+            aggfunc=["sum", "mean"],
         )
         tm.assert_frame_equal(result, expected)
 
@@ -1749,7 +1749,7 @@ def test_pivot_table_margins_name_with_aggfunc_list(self):
             columns="day",
             margins=True,
             margins_name=margins_name,
-            aggfunc=[np.mean, max],
+            aggfunc=["mean", "max"],
         )
         ix = Index(["bacon", "cheese", margins_name], dtype="object", name="item")
         tups = [
@@ -1927,13 +1927,13 @@ def test_pivot_table_not_series(self):
         # and aggfunc is not instance of list
         df = DataFrame({"col1": [3, 4, 5], "col2": ["C", "D", "E"], "col3": [1, 3, 9]})
 
-        result = df.pivot_table("col1", index=["col3", "col2"], aggfunc=np.sum)
+        result = df.pivot_table("col1", index=["col3", "col2"], aggfunc="sum")
         m = MultiIndex.from_arrays([[1, 3, 9], ["C", "D", "E"]], names=["col3", "col2"])
         expected = DataFrame([3, 4, 5], index=m, columns=["col1"])
 
         tm.assert_frame_equal(result, expected)
 
-        result = df.pivot_table("col1", index="col3", columns="col2", aggfunc=np.sum)
+        result = df.pivot_table("col1", index="col3", columns="col2", aggfunc="sum")
         expected = DataFrame(
             [[3, np.NaN, np.NaN], [np.NaN, 4, np.NaN], [np.NaN, np.NaN, 5]],
             index=Index([1, 3, 9], name="col3"),
@@ -1942,7 +1942,7 @@ def test_pivot_table_not_series(self):
 
         tm.assert_frame_equal(result, expected)
 
-        result = df.pivot_table("col1", index="col3", aggfunc=[np.sum])
+        result = df.pivot_table("col1", index="col3", aggfunc=["sum"])
         m = MultiIndex.from_arrays([["sum"], ["col1"]])
         expected = DataFrame([3, 4, 5], index=Index([1, 3, 9], name="col3"), columns=m)
 
@@ -2037,7 +2037,10 @@ def test_pivot_string_func_vs_func(self, f, f_numpy, data):
         # for consistency purposes
         data = data.drop(columns="C")
         result = pivot_table(data, index="A", columns="B", aggfunc=f)
-        expected = pivot_table(data, index="A", columns="B", aggfunc=f_numpy)
+        ops = "|".join(f) if isinstance(f, list) else f
+        msg = f"using DataFrameGroupBy.[{ops}]"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = pivot_table(data, index="A", columns="B", aggfunc=f_numpy)
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.slow
@@ -2104,7 +2107,7 @@ def test_pivot_table_aggfunc_scalar_dropna(self, dropna):
             {"A": ["one", "two", "one"], "x": [3, np.nan, 2], "y": [1, np.nan, np.nan]}
         )
 
-        result = pivot_table(df, columns="A", aggfunc=np.mean, dropna=dropna)
+        result = pivot_table(df, columns="A", aggfunc="mean", dropna=dropna)
 
         data = [[2.5, np.nan], [1, np.nan]]
         col = Index(["one", "two"], name="A")
@@ -2172,7 +2175,7 @@ def test_pivot_table_multiindex_columns_doctest_case(self):
             df,
             values=["D", "E"],
             index=["A", "C"],
-            aggfunc={"D": np.mean, "E": [min, max, np.mean]},
+            aggfunc={"D": "mean", "E": ["min", "max", "mean"]},
         )
         cols = MultiIndex.from_tuples(
             [("D", "mean"), ("E", "max"), ("E", "mean"), ("E", "min")]
@@ -2374,7 +2377,7 @@ def test_pivot_table_with_mixed_nested_tuples(self, using_array_manager):
             }
         )
         result = pivot_table(
-            df, values="D", index=["A", "B"], columns=[(7, "seven")], aggfunc=np.sum
+            df, values="D", index=["A", "B"], columns=[(7, "seven")], aggfunc="sum"
         )
         expected = DataFrame(
             [[4.0, 5.0], [7.0, 6.0], [4.0, 1.0], [np.nan, 6.0]],
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index 8c5f9a894f2f7..43f1f5527c8e2 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -19,13 +19,13 @@ def test_reindex_level(self, multiindex_year_month_day_dataframe_random_data):
 
         month_sums = ymd.groupby("month").sum()
         result = month_sums.reindex(ymd.index, level=1)
-        expected = ymd.groupby(level="month").transform(np.sum)
+        expected = ymd.groupby(level="month").transform("sum")
 
         tm.assert_frame_equal(result, expected)
 
         # Series
         result = month_sums["A"].reindex(ymd.index, level=1)
-        expected = ymd["A"].groupby(level="month").transform(np.sum)
+        expected = ymd["A"].groupby(level="month").transform("sum")
         tm.assert_series_equal(result, expected, check_names=False)
 
         # axis=1
@@ -35,7 +35,7 @@ def test_reindex_level(self, multiindex_year_month_day_dataframe_random_data):
 
         month_sums = gb.sum()
         result = month_sums.reindex(columns=ymd.index, level=1)
-        expected = ymd.groupby(level="month").transform(np.sum).T
+        expected = ymd.groupby(level="month").transform("sum").T
         tm.assert_frame_equal(result, expected)
 
     def test_reindex(self, multiindex_dataframe_random_data):
diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py
index d6cca5061671b..6b7093b4e4c3c 100644
--- a/pandas/tests/window/test_api.py
+++ b/pandas/tests/window/test_api.py
@@ -85,12 +85,14 @@ def test_agg(step):
     b_mean = r["B"].mean()
     b_std = r["B"].std()
 
-    result = r.aggregate([np.mean, np.std])
+    with tm.assert_produces_warning(FutureWarning, match="using Rolling.[mean|std]"):
+        result = r.aggregate([np.mean, np.std])
     expected = concat([a_mean, a_std, b_mean, b_std], axis=1)
     expected.columns = MultiIndex.from_product([["A", "B"], ["mean", "std"]])
     tm.assert_frame_equal(result, expected)
 
-    result = r.aggregate({"A": np.mean, "B": np.std})
+    with tm.assert_produces_warning(FutureWarning, match="using Rolling.[mean|std]"):
+        result = r.aggregate({"A": np.mean, "B": np.std})
 
     expected = concat([a_mean, b_std], axis=1)
     tm.assert_frame_equal(result, expected, check_like=True)
@@ -143,7 +145,8 @@ def test_agg_apply(raw):
     r = df.rolling(window=3)
     a_sum = r["A"].sum()
 
-    result = r.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)})
+    with tm.assert_produces_warning(FutureWarning, match="using Rolling.[sum|std]"):
+        result = r.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)})
     rcustom = r["B"].apply(lambda x: np.std(x, ddof=1), raw=raw)
     expected = concat([a_sum, rcustom], axis=1)
     tm.assert_frame_equal(result, expected, check_like=True)
@@ -153,15 +156,18 @@ def test_agg_consistency(step):
     df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
     r = df.rolling(window=3, step=step)
 
-    result = r.agg([np.sum, np.mean]).columns
+    with tm.assert_produces_warning(FutureWarning, match="using Rolling.[sum|mean]"):
+        result = r.agg([np.sum, np.mean]).columns
     expected = MultiIndex.from_product([list("AB"), ["sum", "mean"]])
     tm.assert_index_equal(result, expected)
 
-    result = r["A"].agg([np.sum, np.mean]).columns
+    with tm.assert_produces_warning(FutureWarning, match="using Rolling.[sum|mean]"):
+        result = r["A"].agg([np.sum, np.mean]).columns
     expected = Index(["sum", "mean"])
     tm.assert_index_equal(result, expected)
 
-    result = r.agg({"A": [np.sum, np.mean]}).columns
+    with tm.assert_produces_warning(FutureWarning, match="using Rolling.[sum|mean]"):
+        result = r.agg({"A": [np.sum, np.mean]}).columns
     expected = MultiIndex.from_tuples([("A", "sum"), ("A", "mean")])
     tm.assert_index_equal(result, expected)