# Demo Pipeline 1

## First Example: chain functions

This notebook show how to implement a simple pipeline.

In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame()

# create a column
df['some_class'] = ['A', 'B', 'C']
df['some_group'] = ['y', 'y', 'n']
df['some_number'] = [32,29,19]
df['some_number2'] = [1,2,3]
#view dataframe
df

Unnamed: 0,some_class,some_group,some_number,some_number2
0,A,y,32,1
1,B,y,29,2
2,C,n,19,3


In [3]:
# groups the data by a column and return the mean of each other column per group
def mean_age_by_group(dataframe, col):
  return dataframe.groupby(col).mean()

In [4]:
def uppercase_column_name(dataframe):
  dataframe.columns = dataframe.columns.str.upper()
  return dataframe

In [5]:
(df
 .pipe(mean_age_by_group, col = 'some_group')
 .pipe(uppercase_column_name)
)

Unnamed: 0_level_0,SOME_NUMBER,SOME_NUMBER2
some_group,Unnamed: 1_level_1,Unnamed: 2_level_1
n,19.0,3.0
y,30.5,1.5


## Second Example: composite functions

In [6]:
df

Unnamed: 0,some_class,some_group,some_number,some_number2
0,A,y,32,1
1,B,y,29,2
2,C,n,19,3


Define some functions

In [27]:
# multiply by 2, i.e. apply predefined operation
def h(df):
  print("H")
  return df*2
# multiply by arg1, i.e. apply operation with an argument
def g(x, arg1):
  print("g")
  return x*arg1
# multiply by arg2*(arg3-arg1), i.e. apply operation with multiple arguments
def f(arg1,df_alias, arg3):
  print("F")
  #return arg2*(arg3-arg1)
  return df_alias*(arg3-arg1)

In [28]:
#####################################
# f(df_alias = df), arg1=3), arg2=5, arg3=7)
#####################################
(df
    .pipe((f, 'df_alias'), arg1=4, arg3=7)
 )

F


Unnamed: 0,some_class,some_group,some_number,some_number2
0,AAA,yyy,96,3
1,BBB,yyy,87,6
2,CCC,nnn,57,9


In [9]:
#####################################
# g(h(df), arg1=3), arg2=5, arg3=7)
#####################################
(df
    .pipe(g, arg1=3)
    .pipe((f, 'arg2'), arg1=5, arg3=7)
 )

g
F


Unnamed: 0,some_class,some_group,some_number,some_number2
0,AAAAAA,yyyyyy,192,6
1,BBBBBB,yyyyyy,174,12
2,CCCCCC,nnnnnn,114,18


In [10]:
#####################################
# f(g(h(df), arg1=3), arg2=5, arg3=7)
#####################################
(df
    .pipe(h)
    .pipe(g, arg1=3)
    .pipe((f, 'arg2'), arg1=5, arg3=7)
 )

H
g
F


Unnamed: 0,some_class,some_group,some_number,some_number2
0,AAAAAAAAAAAA,yyyyyyyyyyyy,384,12
1,BBBBBBBBBBBB,yyyyyyyyyyyy,348,24
2,CCCCCCCCCCCC,nnnnnnnnnnnn,228,36
