In [1]:
import numpy as np

import pandas as pd

In [2]:
index = pd.date_range("1/1/2000", periods=8)

s = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"])

df = pd.DataFrame(np.random.randn(8, 3), index=index, columns=["A", "B", "C"])

df = pd.DataFrame(
    {
        "one": pd.Series(np.random.randn(3), index=["a", "b", "c"]),
        "two": pd.Series(np.random.randn(4), index=["a", "b", "c", "d"]),
        "three": pd.Series(np.random.randn(3), index=["b", "c", "d"]),
    }
)

In [6]:
df = pd.DataFrame(
    {
        "one": pd.Series(np.random.randn(3), index=["a", "b", "c"]),
        "two": pd.Series(np.random.randn(4), index=["a", "b", "c", "d"]),
        "three": pd.Series(np.random.randn(3), index=["b", "c", "d"]),
    }
)

df.apply(lambda x: np.mean(x))
# one      0.811094
# two      1.360588
# three    0.187958
# dtype: float64

df.apply(lambda x: np.mean(x), axis=1)
# a    1.583749
# b    0.734929
# c    1.133683
# d   -0.166914
# dtype: float64

df.apply(lambda x: x.max() - x.min())
# one      1.051928
# two      1.632779
# three    1.840607
# dtype: float64

df.apply(np.cumsum)
#         one       two     three
# a  1.394981  1.772517       NaN
# b  1.738035  3.684640 -0.050390
# c  2.433281  5.163008  1.177045
# d       NaN  5.442353  0.563873

df.apply(np.exp)
#         one       two     three
# a  4.034899  5.885648       NaN
# b  1.409244  6.767440  0.950858
# c  2.004201  4.385785  3.412466
# d       NaN  1.322262  0.541630

   Salary  Others
0    8000  1000.0
1    9500     NaN
2    5000  2000.0
    Salary   Others
0  5892.48   736.56
1  6997.32      NaN
2  3682.80  1473.12


In [7]:
df = pd.DataFrame([[4, 9]] * 3, columns=['A', 'B'])
print(df)
#    A  B
# 0  4  9
# 1  4  9
# 2  4  9

# Usando una función universal numpy (en este caso igual que np.sqrt(df)):
print(df.apply(np.sqrt))
#      A    B
# 0  2.0  3.0
# 1  2.0  3.0
# 2  2.0  3.0

# Using a reducing function on either axis
print(df.apply(np.sum, axis=0))
# A    12
# B    27
# dtype: int64

print(df.apply(np.sum, axis=1))
# 0    13
# 1    13
# 2    13
# dtype: int64

   A  B
0  4  9
1  4  9
2  4  9
     A    B
0  2.0  3.0
1  2.0  3.0
2  2.0  3.0
A    12
B    27
dtype: int64
0    13
1    13
2    13
dtype: int64


In [None]:
# Pasar result_type='expand' expandirá los resultados tipo lista a las columnas de un Dataframe
df.apply(lambda x: [1, 2], axis=1, result_type='expand')
#    0  1
# 0  1  2
# 1  1  2
# 2  1  2

# Devolver una serie dentro de la función es similar a pasar result_type='expand'. Los nombres de columna resultantes serán el índice de la serie.
df.apply(lambda x: pd.Series([1, 2], index=['foo', 'bar']), axis=1)
#    foo  bar
# 0    1    2
# 1    1    2
# 2    1    2

# Pasar result_type='broadcast' asegurará el mismo resultado de forma, ya sea como lista o escalar es devuelto por la función, y lo transmite a lo largo del eje. Los nombres de columna resultantes serán los originales.
df.apply(lambda x: [1, 2], axis=1, result_type='broadcast')
#    A  B
# 0  1  2
# 1  1  2
# 2  1  2

In [None]:
df = pd.DataFrame([[1, 2, 3],
                   [4, 5, 6],
                   [7, 8, 9],
                   [np.nan, np.nan, np.nan]],
                  columns=['A', 'B', 'C'])

# Aggregate these functions over the rows.
df.agg(['sum', 'min'])
#         A     B     C
# sum  12.0  15.0  18.0
# min   1.0   2.0   3.0

# Different aggregations per column.
df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']})
#         A    B
# sum  12.0  NaN
# min   1.0  2.0
# max   NaN  8.0

# Aggregate different functions over the columns and rename the index of the resulting DataFrame.
df.agg(x=('A', 'max'), y=('B', 'min'), z=('C', 'mean'))
#      A    B    C
# x  7.0  NaN  NaN
# y  NaN  2.0  NaN
# z  NaN  NaN  6.0

# Aggregate over the columns.
df.agg("mean", axis="columns")
# 0    2.0
# 1    5.0
# 2    8.0
# 3    NaN
# dtype: float64