# Pandas: apply

# Example 1: sales 

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_table('https://github.com/prasertcbs/tutorial/raw/master/sales.tsv', index_col='Year')
df

Unnamed: 0_level_0,TV,Camera,Phone
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2011,556300,943620,518600
2012,344200,583370,323920
2013,727320,548640,941500
2014,462870,661520,650570
2015,916600,974180,148740
2016,176830,588990,910570
2017,745550,896860,982820


In [3]:
df.apply(np.sum)

TV        3929670
Camera    5197180
Phone     4476720
dtype: int64

In [4]:
df.apply(np.sum, axis=0)

TV        3929670
Camera    5197180
Phone     4476720
dtype: int64

In [5]:
df.apply(np.sum, axis='index')

TV        3929670
Camera    5197180
Phone     4476720
dtype: int64

In [6]:
df.sum()

TV        3929670
Camera    5197180
Phone     4476720
dtype: int64

In [7]:
df.apply(np.sum, axis=1)

Year
2011    2018520
2012    1251490
2013    2217460
2014    1774960
2015    2039520
2016    1676390
2017    2625230
dtype: int64

In [8]:
df.apply(np.sum, axis='columns')

Year
2011    2018520
2012    1251490
2013    2217460
2014    1774960
2015    2039520
2016    1676390
2017    2625230
dtype: int64

In [9]:
df.sum(axis=1)

Year
2011    2018520
2012    1251490
2013    2217460
2014    1774960
2015    2039520
2016    1676390
2017    2625230
dtype: int64

In [10]:
from scipy import stats

In [11]:
df.apply(stats.hmean)

TV        428773.063662
Camera    703686.417471
Phone     425440.529110
dtype: float64

In [12]:
df.apply(stats.hmean, axis=1)

Year
2011    626879.558570
2012    389274.318996
2013    704269.269335
2014    575902.233035
2015    339342.184959
2016    354979.850179
2017    863614.238880
dtype: float64

# Example 2: mpg

In [13]:
df = pd.read_csv('https://github.com/prasertcbs/tutorial/raw/master/mpg.csv')
df[:10]

Unnamed: 0,manufacturer,model,displ,year,cyl,trans,drv,cty,hwy,fl,class
0,audi,a4,1.8,1999,4,auto(l5),f,18,29,p,compact
1,audi,a4,1.8,1999,4,manual(m5),f,21,29,p,compact
2,audi,a4,2.0,2008,4,manual(m6),f,20,31,p,compact
3,audi,a4,2.0,2008,4,auto(av),f,21,30,p,compact
4,audi,a4,2.8,1999,6,auto(l5),f,16,26,p,compact
5,audi,a4,2.8,1999,6,manual(m5),f,18,26,p,compact
6,audi,a4,3.1,2008,6,auto(av),f,18,27,p,compact
7,audi,a4 quattro,1.8,1999,4,manual(m5),4,18,26,p,compact
8,audi,a4 quattro,1.8,1999,4,auto(l5),4,16,25,p,compact
9,audi,a4 quattro,2.0,2008,4,manual(m6),4,20,28,p,compact


In [14]:
df.manufacturer.apply(str.upper)

0            AUDI
1            AUDI
2            AUDI
3            AUDI
4            AUDI
          ...    
229    VOLKSWAGEN
230    VOLKSWAGEN
231    VOLKSWAGEN
232    VOLKSWAGEN
233    VOLKSWAGEN
Name: manufacturer, Length: 234, dtype: object

In [15]:
df.manufacturer.map(str.upper)

0            AUDI
1            AUDI
2            AUDI
3            AUDI
4            AUDI
          ...    
229    VOLKSWAGEN
230    VOLKSWAGEN
231    VOLKSWAGEN
232    VOLKSWAGEN
233    VOLKSWAGEN
Name: manufacturer, Length: 234, dtype: object

In [16]:
df[['manufacturer', 'model', 'class']].apply(lambda s: s.str.upper())

Unnamed: 0,manufacturer,model,class
0,AUDI,A4,COMPACT
1,AUDI,A4,COMPACT
2,AUDI,A4,COMPACT
3,AUDI,A4,COMPACT
4,AUDI,A4,COMPACT
...,...,...,...
229,VOLKSWAGEN,PASSAT,MIDSIZE
230,VOLKSWAGEN,PASSAT,MIDSIZE
231,VOLKSWAGEN,PASSAT,MIDSIZE
232,VOLKSWAGEN,PASSAT,MIDSIZE


In [17]:
df.apply(lambda d: f'{d.manufacturer} ({d.model} {d.trans})', axis=1)

0                  audi (a4 auto(l5))
1                audi (a4 manual(m5))
2                audi (a4 manual(m6))
3                  audi (a4 auto(av))
4                  audi (a4 auto(l5))
                    ...              
229      volkswagen (passat auto(s6))
230    volkswagen (passat manual(m6))
231      volkswagen (passat auto(l5))
232    volkswagen (passat manual(m5))
233      volkswagen (passat auto(s6))
Length: 234, dtype: object

In [18]:
df

Unnamed: 0,manufacturer,model,displ,year,cyl,trans,drv,cty,hwy,fl,class
0,audi,a4,1.8,1999,4,auto(l5),f,18,29,p,compact
1,audi,a4,1.8,1999,4,manual(m5),f,21,29,p,compact
2,audi,a4,2.0,2008,4,manual(m6),f,20,31,p,compact
3,audi,a4,2.0,2008,4,auto(av),f,21,30,p,compact
4,audi,a4,2.8,1999,6,auto(l5),f,16,26,p,compact
...,...,...,...,...,...,...,...,...,...,...,...
229,volkswagen,passat,2.0,2008,4,auto(s6),f,19,28,p,midsize
230,volkswagen,passat,2.0,2008,4,manual(m6),f,21,29,p,midsize
231,volkswagen,passat,2.8,1999,6,auto(l5),f,16,26,p,midsize
232,volkswagen,passat,2.8,1999,6,manual(m5),f,18,26,p,midsize


In [19]:
df['kml'] = df.apply(lambda d: f'{d.cty * 0.425143707:4.1f}/{d.hwy * 0.425143707:4.1f}', axis=1)
df

Unnamed: 0,manufacturer,model,displ,year,cyl,trans,drv,cty,hwy,fl,class,kml
0,audi,a4,1.8,1999,4,auto(l5),f,18,29,p,compact,7.7/12.3
1,audi,a4,1.8,1999,4,manual(m5),f,21,29,p,compact,8.9/12.3
2,audi,a4,2.0,2008,4,manual(m6),f,20,31,p,compact,8.5/13.2
3,audi,a4,2.0,2008,4,auto(av),f,21,30,p,compact,8.9/12.8
4,audi,a4,2.8,1999,6,auto(l5),f,16,26,p,compact,6.8/11.1
...,...,...,...,...,...,...,...,...,...,...,...,...
229,volkswagen,passat,2.0,2008,4,auto(s6),f,19,28,p,midsize,8.1/11.9
230,volkswagen,passat,2.0,2008,4,manual(m6),f,21,29,p,midsize,8.9/12.3
231,volkswagen,passat,2.8,1999,6,auto(l5),f,16,26,p,midsize,6.8/11.1
232,volkswagen,passat,2.8,1999,6,manual(m5),f,18,26,p,midsize,7.7/11.1


In [20]:
df.mean()

displ       3.471795
year     2003.500000
cyl         5.888889
cty        16.858974
hwy        23.440171
dtype: float64

In [21]:
df.select_dtypes(include=[np.number]).drop('year', axis=1).apply(stats.hmean)

displ     3.014710
cyl       5.446082
cty      15.848635
hwy      21.914100
dtype: float64