In [1]:
import pandas as pd

In [11]:
# Create empty dataframe
df = pd.DataFrame()

# Create a column
df['name'] = ['Lauren', 'Glory', 'Mark']
df['gender'] = ['Female', 'Female', 'Male']
df['age'] = [21, 20, 19]

# View dataframe
df

Unnamed: 0,name,gender,age
0,Lauren,Female,21
1,Glory,Female,20
2,Mark,Male,19


### Create Functions to Process Data

In [12]:
# Create a function that groups the data by a column and returns the mean age per group
def mean_age_by_group(dataframe, col):
    return dataframe.groupby(col).mean()

In [13]:
# Create a function that capitalizes all the column headers and returns them
def uppercase_column_name(dataframe):
    dataframe.columns = dataframe.columns.str.upper()
    return dataframe

### Create A Pipeline Of Those Functions

In [20]:
df.pipe(mean_age_by_group, col='gender').pipe(uppercase_column_name)

Unnamed: 0_level_0,AGE
gender,Unnamed: 1_level_1
Female,20.5
Male,19.0


#### Since we used a group by, the gender column was collapsed and the name column was removed to accomodate this to happen. We can save this change permanently by assigning it to a dataframe.

In [78]:
df

Unnamed: 0,name,gender,age,is_Male,is_Female
0,Lauren,Female,21,False,True
1,Glory,Female,20,False,True
2,Mark,Male,19,True,False


### We can add as many function as we want to this pipeline

In [79]:
# Create a function that returns a datframe with a new column whose value is based on the gender value
def find_gender(dataframe, gender):
    df['is_'+gender] = (df['gender'] == gender)
    return df

In [80]:
df = find_gender(df, 'Male')

In [81]:
# Create a function that finds the max value in a column and returns the corresponding row/record
def find_max(dataframe, col):
    record = df.loc[df[col].idxmax()]
    return record

In [82]:
record = find_max(df, 'age')

In [83]:
record

name         Lauren
gender       Female
age              21
is_Male       False
is_Female      True
Name: 0, dtype: object

#### Note that idxmax returns index labels. So if the DataFrame as duplicates in the index, the label may not uniquely identify the row, so df.loc may return more than one row.
#### Therefore, if df does not have a unique index, you must make the index unique before proceeding as above. Depending on DataFrame, sometimes you can use stack or set_index to make the index unique. Or, you can simply reset the index (so the rows become renumbered, starting at 0):
-- df = df.reset_index()