- apply() is used to apply a function along an axis of the DataFrame or on values of Series.
- applymap() is used to apply a function to a DataFrame elementwise.
- map() is used to substitute each value in a Series with another value.

In [1]:
import pandas as pd

In [2]:
train = pd.read_csv('http://bit.ly/kaggletrain')
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
train['Swx_num'] = train.Sex.map({'female':0, 'male':1})

In [4]:
train.iloc[0:4, [4,12]]

Unnamed: 0,Sex,Swx_num
0,male,1
1,female,0
2,female,0
3,female,0


In [5]:
train['Name_length'] = train.Name.apply(len)

In [6]:
train.loc[0:4, ['Name', 'Name_length']]

Unnamed: 0,Name,Name_length
0,"Braund, Mr. Owen Harris",23
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",51
2,"Heikkinen, Miss. Laina",22
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",44
4,"Allen, Mr. William Henry",24


In [7]:
import numpy as np

In [8]:
train['Fare_ceil'] = train.Fare.apply(np.ceil)

In [9]:
train.loc[0:4, ['Fare', 'Fare_ceil']]

Unnamed: 0,Fare,Fare_ceil
0,7.25,8.0
1,71.2833,72.0
2,7.925,8.0
3,53.1,54.0
4,8.05,9.0


In [10]:
train.Name.str.split(',').head()

0                           [Braund,  Mr. Owen Harris]
1    [Cumings,  Mrs. John Bradley (Florence Briggs ...
2                            [Heikkinen,  Miss. Laina]
3      [Futrelle,  Mrs. Jacques Heath (Lily May Peel)]
4                          [Allen,  Mr. William Henry]
Name: Name, dtype: object

In [11]:
def get_element(my_list, position):
    return my_list[position]

In [12]:
train.Name.str.split(',').apply(get_element, position=0).head()

0       Braund
1      Cumings
2    Heikkinen
3     Futrelle
4        Allen
Name: Name, dtype: object

In [13]:
train.Name.str.split(',').head().apply(lambda x: x[0]).head()

0       Braund
1      Cumings
2    Heikkinen
3     Futrelle
4        Allen
Name: Name, dtype: object

In [14]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [15]:
drinks.loc[:, 'beer_servings':'wine_servings'].apply(max, axis=0)

beer_servings      376
spirit_servings    438
wine_servings      370
dtype: int64

In [16]:
drinks.loc[:, 'beer_servings':'wine_servings'].apply(np.argmax, axis=1)

0      0
1      1
2      0
3      2
4      0
      ..
188    0
189    0
190    0
191    0
192    0
Length: 193, dtype: int64

In [17]:
drinks.loc[:, 'beer_servings':'wine_servings'].applymap(float)

Unnamed: 0,beer_servings,spirit_servings,wine_servings
0,0.0,0.0,0.0
1,89.0,132.0,54.0
2,25.0,0.0,14.0
3,245.0,138.0,312.0
4,217.0,57.0,45.0
...,...,...,...
188,333.0,100.0,3.0
189,111.0,2.0,1.0
190,6.0,0.0,0.0
191,32.0,19.0,4.0


In [18]:
drinks.loc[:, 'beer_servings':'wine_servings'].applymap(float)

Unnamed: 0,beer_servings,spirit_servings,wine_servings
0,0.0,0.0,0.0
1,89.0,132.0,54.0
2,25.0,0.0,14.0
3,245.0,138.0,312.0
4,217.0,57.0,45.0
...,...,...,...
188,333.0,100.0,3.0
189,111.0,2.0,1.0
190,6.0,0.0,0.0
191,32.0,19.0,4.0


# How to use apply()?

In [19]:
import pandas as pd
df = pd.DataFrame({ 'A': [1,2,3,4], 
                   'B': [10,20,30,40],
                   'C': [20,40,60,80]
                  }, 
                  index=['Row 1', 'Row 2', 'Row 3', 'Row 4'])

In [20]:
def custom_sum(row):
    return row.sum()

df['D'] = df.apply(custom_sum, axis=1)

In [21]:
df.head()

Unnamed: 0,A,B,C,D
Row 1,1,10,20,31
Row 2,2,20,40,62
Row 3,3,30,60,93
Row 4,4,40,80,124


In [22]:
# 第五列
df.loc['Row 5'] = df.apply(custom_sum, axis=0)

In [23]:
df.head()

Unnamed: 0,A,B,C,D
Row 1,1,10,20,31
Row 2,2,20,40,62
Row 3,3,30,60,93
Row 4,4,40,80,124
Row 5,10,100,200,310


In [24]:
def multiply_by_2(val):
    return val * 2
df['E'] = df['C'].apply(multiply_by_2) #不用設axis的原因是他只對單一series做function

In [25]:
df.head()

Unnamed: 0,A,B,C,D,E
Row 1,1,10,20,31,40
Row 2,2,20,40,62,80
Row 3,3,30,60,93,120
Row 4,4,40,80,124,160
Row 5,10,100,200,310,400


## Use lambda with apply

In [26]:
df['F'] = df.apply(lambda x:x.sum(), axis=1)

In [27]:
df.head()

Unnamed: 0,A,B,C,D,E,F
Row 1,1,10,20,31,40,102
Row 2,2,20,40,62,80,204
Row 3,3,30,60,93,120,306
Row 4,4,40,80,124,160,408
Row 5,10,100,200,310,400,1020


In [28]:
df.loc['Row6'] = df.apply(lambda x:x.sum(), axis=0)

In [29]:
df['G'] = df['C'].apply(lambda x:x*2)

In [30]:
df.head()

Unnamed: 0,A,B,C,D,E,F,G
Row 1,1,10,20,31,40,102,40
Row 2,2,20,40,62,80,204,80
Row 3,3,30,60,93,120,306,120
Row 4,4,40,80,124,160,408,160
Row 5,10,100,200,310,400,1020,400


## With result_type parameter

In [31]:
df.apply(custom_sum, axis=1, result_type='broadcast')

Unnamed: 0,A,B,C,D,E,F,G
Row 1,244,244,244,244,244,244,244
Row 2,488,488,488,488,488,488,488
Row 3,732,732,732,732,732,732,732
Row 4,976,976,976,976,976,976,976
Row 5,2440,2440,2440,2440,2440,2440,2440
Row6,4880,4880,4880,4880,4880,4880,4880


In [32]:
def cal_multi_col(row):
    return [row['A'] * 2, row['B'] * 3]

In [33]:
df.apply(cal_multi_col, axis=1, result_type='expand')

Unnamed: 0,0,1
Row 1,2,30
Row 2,4,60
Row 3,6,90
Row 4,8,120
Row 5,20,300
Row6,40,600


In [34]:
res = df.apply(cal_multi_col, axis=1, result_type='expand')
df[res.columns] = res

In [35]:
df

Unnamed: 0,A,B,C,D,E,F,G,0,1
Row 1,1,10,20,31,40,102,40,2,30
Row 2,2,20,40,62,80,204,80,4,60
Row 3,3,30,60,93,120,306,120,6,90
Row 4,4,40,80,124,160,408,160,8,120
Row 5,10,100,200,310,400,1020,400,20,300
Row6,20,200,400,620,800,2040,800,40,600


In [36]:
df['New'] = df.apply(cal_multi_col, axis=1, result_type='reduce')

In [37]:
df

Unnamed: 0,A,B,C,D,E,F,G,0,1,New
Row 1,1,10,20,31,40,102,40,2,30,"[2, 30]"
Row 2,2,20,40,62,80,204,80,4,60,"[4, 60]"
Row 3,3,30,60,93,120,306,120,6,90,"[6, 90]"
Row 4,4,40,80,124,160,408,160,8,120,"[8, 120]"
Row 5,10,100,200,310,400,1020,400,20,300,"[20, 300]"
Row6,20,200,400,620,800,2040,800,40,600,"[40, 600]"


# How to use applymap()?

In [38]:
df.applymap(np.square)

Unnamed: 0,A,B,C,D,E,F,G,0,1,New
Row 1,1,100,400,961,1600,10404,1600,4,900,"[4, 900]"
Row 2,4,400,1600,3844,6400,41616,6400,16,3600,"[16, 3600]"
Row 3,9,900,3600,8649,14400,93636,14400,36,8100,"[36, 8100]"
Row 4,16,1600,6400,15376,25600,166464,25600,64,14400,"[64, 14400]"
Row 5,100,10000,40000,96100,160000,1040400,160000,400,90000,"[400, 90000]"
Row6,400,40000,160000,384400,640000,4161600,640000,1600,360000,"[1600, 360000]"


# How to use map()?

In [39]:
s = pd.Series(['cat', 'dog', np.nan, 'rabbit'])
>>> s

0       cat
1       dog
2       NaN
3    rabbit
dtype: object

In [40]:
s.map({'cat': 'kitten', 'dog': 'puppy'})

0    kitten
1     puppy
2       NaN
3       NaN
dtype: object

In [41]:
s.map('I am a {}'.format)

0       I am a cat
1       I am a dog
2       I am a nan
3    I am a rabbit
dtype: object

In [42]:
s.map(lambda x : x)

0       cat
1       dog
2       NaN
3    rabbit
dtype: object

In [43]:
s.map('I am a {}'.format, na_action='ignore')

0       I am a cat
1       I am a dog
2              NaN
3    I am a rabbit
dtype: object