# Ch05 - Modifying data with functions (Simple)

In [4]:
import pandas as pd
import numpy as np

In [5]:
data = {'Open': np.random.normal(100, 5, 366),
        'Close': np.random.normal(100, 5, 366)}

In [6]:
df = pd.DataFrame(data)

In [8]:
df.head()

Unnamed: 0,Close,Open
0,104.392516,106.02953
1,97.335476,96.547402
2,101.226359,102.022804
3,99.953867,102.783661
4,106.126685,102.484453


In [9]:
df.apply(np.mean, axis=1).head(3)

0    105.211023
1     96.941439
2    101.624581
dtype: float64

In [10]:
#passing a lambda is a common pattern
df.apply(lambda x: (x['Open'] - x['Close']), axis=1).head(3)
#define a more complex function
def percent_change(x):
    return (x['Open'] - x['Close']) / x['Open']

In [11]:
df.apply(percent_change, axis=1).head(3)

0    0.015439
1   -0.008163
2    0.007807
dtype: float64

In [12]:
#change axis, axis = 0 is default
df.apply(np.mean, axis=0)

Close     99.986877
Open     100.103914
dtype: float64

In [13]:
def greater_than_x(element, x):
    return element > x

In [14]:
df.Open.apply(greater_than_x, args=(100,)).head(3)

0     True
1    False
2     True
Name: Open, dtype: bool

In [15]:
#This can be used as in conjunction with subset capabilities
mask = df.Open.apply(greater_than_x, args=(100,))

In [16]:
df.Open[mask].head()

0    106.029530
2    102.022804
3    102.783661
4    102.484453
9    101.685135
Name: Open, dtype: float64

In [18]:
pd.rolling_apply(df.Close, 5, np.mean).head()

	Series.rolling(center=False,window=5).apply(kwargs=<dict>,func=<function>,args=<tuple>)
  if __name__ == '__main__':


0           NaN
1           NaN
2           NaN
3           NaN
4    101.806981
Name: Close, dtype: float64

In [19]:
#There are actually a several built-in rolling functions
pd.rolling_corr(df.Close, df.Open, 5)[:5]

	Series.rolling(window=5).corr(other=<Series>)
  from ipykernel import kernelapp as app


0         NaN
1         NaN
2         NaN
3         NaN
4    0.738859
dtype: float64