# Pandas DataFrame apply() Examples

### 1. Applying a Function to DataFrame Elements

In [2]:
import pandas as pd

df = pd.DataFrame({'A': [1, 2], 'B': [10, 20]})


def square(x):
    return x * x


df1 = df.apply(square)

print(df)
print()
print(df1)

   A   B
0  1  10
1  2  20

   A    B
0  1  100
1  4  400


### 2. apply() with lambda

In [4]:
df1 = df.apply(lambda x: x * x)
df1

Unnamed: 0,A,B
0,1,100
1,4,400


### 3. apply() along axis

In [9]:
import pandas as pd
import numpy as np

df = pd.DataFrame({'A': [1, 2], 'B': [10, 20]})
print(df)
print()

print("apply to sum values in columns")
df1 = df.apply(np.sum, axis=0)
print(df1)

print()
print("apply to sum values in rows")
df1 = df.apply(np.sum, axis=1)
print(df1)

   A   B
0  1  10
1  2  20

apply to sum values in columns
A     3
B    30
dtype: int64

apply to sum values in rows
0    11
1    22
dtype: int64


### 4. DataFrame apply() with arguments

In [12]:
def sum(x, y, z):
    return x + y + z


df = pd.DataFrame({'A': [1, 2], 'B': [10, 20]})
print(df)
print()

df1 = df.apply(sum, args=(1, 2)) # O primeiro argumento, x, é o valor da linha. y e z são os nóvos valores dados pela função sum
print(df1)

   A   B
0  1  10
1  2  20

   A   B
0  4  13
1  5  23


### 5. DataFrame apply() with positional and keyword arguments

In [14]:
import pandas as pd


def sum(x, y, z, m):
    return (x + y + z) * m


df = pd.DataFrame({'A': [1, 2], 'B': [10, 20]})
print(df)
print()

df1 = df.apply(sum, args=(1, 2), m=10)
print(df1)

   A   B
0  1  10
1  2  20

    A    B
0  40  130
1  50  230


In [24]:
import pandas as pd


def sum(x, y, z, m, l, k, w, s):
    return (x + y + z) * m/(l+k+w+s)


df = pd.DataFrame({'A': [1, 2], 'B': [10, 20]})
print(df)
print()

df1 = df.apply(sum, args=(1, 2), m=10, l=10, k=10, w=10, s=10)
print(df1)

   A   B
0  1  10
1  2  20

      A     B
0  1.00  3.25
1  1.25  5.75


# DataFrame applymap() function

If you want to apply a function element-wise, you can use applymap() function. This function doesn’t have additional arguments. The function is applied to each of the element and the returned value is used to create the result DataFrame object.

In [25]:
import pandas as pd
import math

df = pd.DataFrame({'A': [1, 4], 'B': [100, 400]})
df1 = df.applymap(math.sqrt)

print(df)
print(df1)

   A    B
0  1  100
1  4  400
     A     B
0  1.0  10.0
1  2.0  20.0


# Use .apply with axis=1 to send every single row to a function

In [26]:
# Create a dataframe from a list of dictionaries
rectangles = [
    { 'height': 40, 'width': 10 },
    { 'height': 20, 'width': 9 },
    { 'height': 3.4, 'width': 4 }
]

rectangles_df = pd.DataFrame(rectangles)
rectangles_df

Unnamed: 0,height,width
0,40.0,10
1,20.0,9
2,3.4,4


In [27]:
# Use the height and width to calculate the area
def calculate_area(row):
    return row['height'] * row['width']

In [28]:
# Use .apply to save the new column if we'd like
rectangles_df['area'] = rectangles_df.apply(calculate_area, axis=1)
rectangles_df

Unnamed: 0,height,width,area
0,40.0,10,400.0
1,20.0,9,180.0
2,3.4,4,13.6


### Apply com indices relativos das linhas

In [121]:
# Create a dataframe from a list of dictionaries
rectangles = [
    { 'height': 40, 'width': 10 },
    { 'height': 20, 'width': 9 },
    { 'height': 3.4, 'width': 4 }
]

rectangles_df = pd.DataFrame(rectangles)
rectangles_df

Unnamed: 0,height,width
0,40.0,10
1,20.0,9
2,3.4,4


In [122]:
# Use the height and width to calculate the area
def calculate_area(row):
    print("_"*100)
    indice = rectangles_df["height"][rectangles_df["height"] == row['height']].index[0]
    print(f"indice: {indice}")
    try:
        print("Escolhendo o valor de um índice anterior")
        print(rectangles_df["height"][indice - 1])
    except:
        pass
    print("_"*100)
    return row['height'] * row['width']

In [123]:
# Use .apply to save the new column if we'd like
rectangles_df['area'] = rectangles_df.apply(calculate_area, axis=1)
rectangles_df

____________________________________________________________________________________________________
indice: 0
Escolhendo o valor de um índice anterior
____________________________________________________________________________________________________
____________________________________________________________________________________________________
indice: 1
Escolhendo o valor de um índice anterior
40.0
____________________________________________________________________________________________________
____________________________________________________________________________________________________
indice: 2
Escolhendo o valor de um índice anterior
20.0
____________________________________________________________________________________________________


Unnamed: 0,height,width,area
0,40.0,10,400.0
1,20.0,9,180.0
2,3.4,4,13.6


### Método II: Apply com indices relativos das linhas

In [148]:
# Create a dataframe from a list of dictionaries
rectangles = [
    { 'height': 40, 'width': 10 },
    { 'height': 20, 'width': 20},
    { 'height': 3, 'width': 30 }
]

rectangles_df = pd.DataFrame(rectangles)
rectangles_df

Unnamed: 0,height,width
0,40,10
1,20,20
2,3,30


In [157]:
# Use the height and width to calculate the area
def calculate_area(row):
    print("_"*50 + "start"+ "_"*50)
    try:
        print(f"row._name: {row._name}")
        print(f"row['height']: {row['height']}")
        print(f"rectangles_df['width'].iloc[row._name-1]: {rectangles_df['width'].iloc[row._name-1]}")
        area = row['height'] * rectangles_df['width'].iloc[row._name-1]
    except:
        pass
    print("_"*50 + "end"+ "_"*50)
    return area

In [158]:
# Use .apply to save the new column if we'd like
rectangles_df['area'] = rectangles_df.apply(calculate_area, axis=1)
rectangles_df

__________________________________________________start__________________________________________________
row._name: 0
row['height']: 40
rectangles_df['width'].iloc[row._name-1]: 30
__________________________________________________end__________________________________________________
__________________________________________________start__________________________________________________
row._name: 1
row['height']: 20
rectangles_df['width'].iloc[row._name-1]: 10
__________________________________________________end__________________________________________________
__________________________________________________start__________________________________________________
row._name: 2
row['height']: 3
rectangles_df['width'].iloc[row._name-1]: 20
__________________________________________________end__________________________________________________


Unnamed: 0,height,width,area
0,40,10,1200
1,20,20,200
2,3,30,60
