In [4]:
# import necessary modules
import numpy as np
import pandas as pd

* tablewise function application: pipe()
* row or column-wise function application: apply()


## Table Function Applicaiton
DataFrames and Series can be passed into function without any problems

In [1]:
def extract_city_name(df):
    """
    Chicaco, IL -> Chicago for city_name column
    """
    df['city_name'] = df['city_and_code'].str.split(",").str.get(0)
    return df

In [2]:
# function to add country name

def add_country_name(df, country_name=None):
    """
    Chicago -> Chicago-US for city_name column
    """
    col = 'city_name'
    df['city_and_country'] = df[col] + country_name
    return df

In [5]:
# create chicago test DataFrame

df_p = pd.DataFrame({'city_and_code':['Chicago, IL']})

In [6]:
# call add_country_name() and exctract_city_name functions we created

add_country_name(extract_city_name(df_p), country_name = 'US')

Unnamed: 0,city_and_code,city_name,city_and_country
0,"Chicago, IL",Chicago,ChicagoUS


In [7]:
# using the pipe() streamlines the use of our own functions along side pandas functions
(df_p.pipe(extract_city_name).pipe(add_country_name, country_name="US"))

Unnamed: 0,city_and_code,city_name,city_and_country
0,"Chicago, IL",Chicago,ChicagoUS


## Row or Column-wise Function Application

In [8]:
df = pd.DataFrame({
    'one': pd.Series(np.random.randn(3), index = ['a', 'b', 'c']),
    'two': pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']),
    'three': pd.Series(np.random.randn(3), index = ['b', 'c', 'd'])})

In [9]:
# pre-build numpy function
df.apply(np.mean)

one      0.244973
two      0.141112
three    0.115705
dtype: float64

In [10]:
df.apply(np.mean, axis=1)

a    0.091453
b   -0.351936
c    0.295545
d    0.816375
dtype: float64

In [11]:
# own lambda function.
df.apply(lambda x: x.max() - x.min())

one      1.837462
two      2.031592
three    1.821197
dtype: float64

In [13]:
# pre-build numpy function
df.apply(np.cumsum)

Unnamed: 0,one,two,three
a,-0.925151,1.108057,
b,-0.012841,0.184522,-1.044584
c,0.73492,-0.29169,-0.429498
d,,0.564446,0.347115


In [14]:
# pre-build numpy function
df.apply(np.exp)

Unnamed: 0,one,two,three
a,0.396472,3.028469,
b,2.490069,0.397113,0.351838
c,2.112265,0.621132,1.849816
d,,2.354047,2.174096


In [15]:
# you can use apply() to apply your own function:
def own_function(x):
    return x*x

df.apply(own_function)

Unnamed: 0,one,two,three
a,0.855905,1.227791,
b,0.83231,0.852917,1.091156
c,0.559146,0.226778,0.378331
d,,0.732969,0.603128


In [16]:
# you're also able to apply multiple arguments to your functions and have them work through apply()
def subtract_and_divide(x, sub, divide=1):
    return (x-sub) / divide

df.apply(subtract_and_divide, args=(5,3))


Unnamed: 0,one,two,three
a,-1.97505,-1.297314,
b,-1.362563,-1.974512,-2.014861
c,-1.417413,-1.825404,-1.461638
d,,-1.381288,-1.407796


In [17]:
# args are iterable and thus require tuple input even if only one argument is passed
def subtract(x, sub):
    return (x - sub)

df.apply(subtract, args =(5,))

Unnamed: 0,one,two,three
a,-5.925151,-3.891943,
b,-4.08769,-5.923535,-6.044584
c,-4.252239,-5.476212,-4.384914
d,,-4.143864,-4.223387
