# Techniques for Method Chaining

Chapter 12.3 from **Python for Data Analysis** by Wes McKinney.

In [1]:
import numpy as np; import pandas as pd

In [None]:
# the following:
df = pd.load_data()
df2 = df[df['col2']<0]
df2['col1_demeaned'] = df2['col1'] - df2['col1'].mean()
result = df2.groupby('key').col1_demeanned.std()

In [None]:
# can be written with method chaning as:
result = (load_data()
          [lambda x: x.col2 <0]
          .assign(col1_demeaned=lambda x: x.col1 - x.col1.mean())
          .goupby('key')
          .col1_demeaned.std())

This solution ensures faster computing and saves memory; however it is not easy to read.

---

### The pipe Method

In [None]:
# consider a sequence of function calls:
a = f(df, arg1=v1)
b = g(a, v2, arg3=v3)
c = h(b, arg4=v4)

In [None]:
# when using functions that call and return Series and DataFrame objects we can call these using pipe
result = (df.pipe(f, arg1=v1)
          .pipe(g, v2, agr3=v3)
          .pipe(h, arg4=v4))

In [None]:
# these two statements are equivalent, but the second makes chaning easier

---

In [None]:
g = df.groupby(['key1', 'key2'])
df['col1'] = df['col1'] - g.transform('mean')

In [None]:
def group_demean(df, by, cols):
    result = df.copy()
    g = df.groupby(by)
    for c in cols:
        result[c] = df[c] - g[c].transform('mean')
    
    return result

result = (df[df.col1 < 0]
         .pipe(group_demean, ['key1', 'key2'], ['col1'] ))

---