[Reference](https://towardsdatascience.com/4-must-know-pandas-function-application-852f5c4b4a1e)

# 1.pipe()

In [2]:
import pandas as pd
import seaborn as sns
mpg = sns.load_dataset('mpg')#Method Chaining
mpg.head().info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           5 non-null      float64
 1   cylinders     5 non-null      int64  
 2   displacement  5 non-null      float64
 3   horsepower    5 non-null      float64
 4   weight        5 non-null      int64  
 5   acceleration  5 non-null      float64
 6   model_year    5 non-null      int64  
 7   origin        5 non-null      object 
 8   name          5 non-null      object 
dtypes: float64(4), int64(3), object(2)
memory usage: 488.0+ bytes


In [3]:
#Function to extract the car first name and create a new column called car_first_name
def extract_car_first_name(df):
    df['car_first_name'] = df['name'].str.split(' ').str.get(0)
    return df

#Function to add my_name after the car_first_name and create a new column called car_and_name
def add_car_my_name(df, my_name = None):
    df['car_and_name'] = df['car_first_name'] + my_name
    
add_car_my_name(extract_car_first_name(mpg), my_name = 'Cornellius')

mpg.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name,car_first_name,car_and_name
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu,chevrolet,chevroletCornellius
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320,buick,buickCornellius
2,18.0,8,318.0,150.0,3436,11.0,70,usa,plymouth satellite,plymouth,plymouthCornellius
3,16.0,8,304.0,150.0,3433,12.0,70,usa,amc rebel sst,amc,amcCornellius
4,17.0,8,302.0,140.0,3449,10.5,70,usa,ford torino,ford,fordCornellius


In [4]:
mpg.pipe(extract_car_first_name).pipe(add_car_my_name, my_name = 'Cornellius')

# 2.apply()

In [5]:
import numpy as np
#Selecting only the numerical columns then applying mean function to each column
mpg.select_dtypes('number').apply(np.mean)

mpg               23.514573
cylinders          5.454774
displacement     193.425879
horsepower       104.469388
weight          2970.424623
acceleration      15.568090
model_year        76.010050
dtype: float64

In [6]:
#Creating a function that accepting the column and return each column mean divided by 2

def mean_divided_by_2(col):
    return (col.mean())/2
    
mpg.select_dtypes('number').apply(mean_divided_by_2)

mpg               11.757286
cylinders          2.727387
displacement      96.712940
horsepower        52.234694
weight          1485.212312
acceleration       7.784045
model_year        38.005025
dtype: float64

In [7]:
#Using print function and change the axis parameter to 1 in order for a row-wise application
mpg.select_dtypes('number').apply(print, axis =1)

mpg               18.0
cylinders          8.0
displacement     307.0
horsepower       130.0
weight          3504.0
acceleration      12.0
model_year        70.0
Name: 0, dtype: float64
mpg               15.0
cylinders          8.0
displacement     350.0
horsepower       165.0
weight          3693.0
acceleration      11.5
model_year        70.0
Name: 1, dtype: float64
mpg               18.0
cylinders          8.0
displacement     318.0
horsepower       150.0
weight          3436.0
acceleration      11.0
model_year        70.0
Name: 2, dtype: float64
mpg               16.0
cylinders          8.0
displacement     304.0
horsepower       150.0
weight          3433.0
acceleration      12.0
model_year        70.0
Name: 3, dtype: float64
mpg               17.0
cylinders          8.0
displacement     302.0
horsepower       140.0
weight          3449.0
acceleration      10.5
model_year        70.0
Name: 4, dtype: float64
mpg               15.0
cylinders          8.0
displacement     429.0
horsep

0      None
1      None
2      None
3      None
4      None
       ... 
393    None
394    None
395    None
396    None
397    None
Length: 398, dtype: object

In [8]:
#Creating a classification function. If the mpg is below 18 and the model_year below 75 it would return Old-School, else it would return New-School

def mpg_classification(cols):
    if cols['mpg'] <18 and cols['model_year'] <75:
        return 'Old-School'
    else:
        return 'New-School'
        
#Creating a new column called Classification by using the mpg_classification function input on apply attribute

mpg['Classification'] = mpg.apply(mpg_classification, axis = 1)
        
mpg.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name,car_first_name,car_and_name,Classification
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu,chevrolet,chevroletCornellius,New-School
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320,buick,buickCornellius,Old-School
2,18.0,8,318.0,150.0,3436,11.0,70,usa,plymouth satellite,plymouth,plymouthCornellius,New-School
3,16.0,8,304.0,150.0,3433,12.0,70,usa,amc rebel sst,amc,amcCornellius,Old-School
4,17.0,8,302.0,140.0,3449,10.5,70,usa,ford torino,ford,fordCornellius,Old-School


# 3. agg()

In [9]:
mpg.agg(np.mean)

  return getattr(obj, f)()


mpg               23.514573
cylinders          5.454774
displacement     193.425879
horsepower       104.469388
weight          2970.424623
acceleration      15.568090
model_year        76.010050
dtype: float64

In [10]:
#Instead of function, agg could receive the string of the basic statistic function. Additionaly, we could implement our own function here as well. If we have multiple function, we put it in the list.
mpg.agg(['mean', 'std',mean_divided_by_2])

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year
mean,23.514573,5.454774,193.425879,104.469388,2970.424623,15.56809,76.01005
std,7.815984,1.701004,104.269838,38.49116,846.841774,2.757689,3.697627
mean_divided_by_2,11.757286,2.727387,96.71294,52.234694,1485.212312,7.784045,38.005025


In [11]:
#For example, I only use mpg and acceleration column. If we pass dictionary to the agg attribute, we need to specify the function we want to every column.

mpg[['mpg', 'acceleration']].agg({'mpg': ['mean',mean_divided_by_2], 'acceleration': 'std'})

Unnamed: 0,mpg,acceleration
mean,23.514573,
mean_divided_by_2,11.757286,
std,,2.757689


# 4. applymap()

In [12]:
#You could input a lambda function as well. Here I create a function to transform each value into string object and return the length of the string
mpg.applymap(lambda x: len(str(x)))

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name,car_first_name,car_and_name,Classification
0,4,1,5,5,4,4,2,3,25,9,19,10
1,4,1,5,5,4,4,2,3,17,5,15,10
2,4,1,5,5,4,4,2,3,18,8,18,10
3,4,1,5,5,4,4,2,3,13,3,13,10
4,4,1,5,5,4,4,2,3,11,4,14,10
...,...,...,...,...,...,...,...,...,...,...,...,...
393,4,1,5,4,4,4,2,3,15,4,14,10
394,4,1,4,4,4,4,2,6,9,2,12,10
395,4,1,5,4,4,4,2,3,13,5,15,10
396,4,1,5,4,4,4,2,3,11,4,14,10


In [13]:
#Using map attribute on the series object
mpg['name'].apply(lambda x: len(str(x)))

0      25
1      17
2      18
3      13
4      11
       ..
393    15
394     9
395    13
396    11
397    10
Name: name, Length: 398, dtype: int64