# Apply Function

In [7]:
import pandas as pd
import numpy as np
import warnings as wr
wr.filterwarnings('ignore')

In [8]:
data = pd.read_csv('NYC_Jobs.csv')
data.head()

Unnamed: 0,Job ID,Agency,Posting Type,Civil Service Title,Job Category,Salary Range From,Salary Range To,Process Date
0,424339,DEPT OF HEALTH/MENTAL HYGIENE,External,PUBLIC HEALTH NURSE,Health,84252.0,84252.0,04-12-22
1,379094,NYC EMPLOYEES RETIREMENT SYS,External,CERT IT DEVELOPER (APP),"Technology, Data & Innovation",82884.0,116391.0,04-12-22
2,520417,NYC HOUSING AUTHORITY,External,EXECUTIVE AGENCY COUNSEL,Legal Affairs,105000.0,125000.0,04-12-22
3,233549,NYC EMPLOYEES RETIREMENT SYS,External,CERTIFIED IT ADMINISTRATOR (LA,Information Technology & Telecommunications,87203.0,131623.0,04-12-22
4,510256,HUMAN RIGHTS COMMISSION,External,ASSOCIATE HUMAN RIGHTS SPECIAL,Constituent Services & Community Programs,58449.0,67216.0,04-12-22


In [9]:
df = data.copy()

### Test 1.0: Apply - Square Root

In [10]:
# 1.0 Single bracket, return series object
data['Salary Range From'].apply(np.sqrt)

0       290.261951
1       287.895814
2       324.037035
3       295.301541
4       241.762280
           ...    
3768    292.183162
3769    273.221522
3770    316.227766
3771    205.640463
3772    249.793915
Name: Salary Range From, Length: 3773, dtype: float64

In [11]:
# data frame object 1.2
data[['Salary Range From']].apply(np.sqrt)

Unnamed: 0,Salary Range From
0,290.261951
1,287.895814
2,324.037035
3,295.301541
4,241.762280
...,...
3768,292.183162
3769,273.221522
3770,316.227766
3771,205.640463


In [12]:
# test 1.3, see the type ita dataFrame object
type(data[['Salary Range From', 'Salary Range To']])

pandas.core.frame.DataFrame

In [13]:
data[['Salary Range From', 'Salary Range To']].apply(np.sqrt)

Unnamed: 0,Salary Range From,Salary Range To
0,290.261951,290.261951
1,287.895814,341.161252
2,324.037035,353.553391
3,295.301541,362.798842
4,241.762280,259.260487
...,...,...
3768,292.183162,346.241245
3769,273.221522,292.996587
3770,316.227766,339.116499
3771,205.640463,243.721152


### Take Away:

1. Pandas Apply function can be applied on either Series object or DataFrame object, can not apply on whole data is str

### Test 2:0 - Find the Average Salary for Each Job

1. it can apply for each row or each columns, by default it apply on columns

In [14]:
# Test 2.1, Applied on columns
data[['Salary Range From', 'Salary Range To']].apply(np.mean)  # default

Salary Range From    57657.939466
Salary Range To      79660.249595
dtype: float64

In [15]:
data[['Salary Range From', 'Salary Range To']].apply(np.mean, axis=0) # default

Salary Range From    57657.939466
Salary Range To      79660.249595
dtype: float64

In [16]:
# Average Salary, Row wise average
data[['Salary Range From', 'Salary Range To']].apply(np.mean, axis=1) # specied for row

0        84252.0
1        99637.5
2       115000.0
3       109413.0
4        62832.5
          ...   
3768    102627.0
3769     80248.5
3770    107500.0
3771     50844.0
3772     67077.0
Length: 3773, dtype: float64

In [17]:
# Check data type >> Series
type(data[['Salary Range From', 'Salary Range To']].apply(np.mean, axis=1))

pandas.core.series.Series

1. it will not effect original dataframe, but it is possible to add the output in dataframe as test follow:

In [18]:
# Test 2.2: adding output in original df
data['Salary Avg.'] = data[['Salary Range From', 'Salary Range To']].apply(np.mean, axis=1)
data

Unnamed: 0,Job ID,Agency,Posting Type,Civil Service Title,Job Category,Salary Range From,Salary Range To,Process Date,Salary Avg.
0,424339,DEPT OF HEALTH/MENTAL HYGIENE,External,PUBLIC HEALTH NURSE,Health,84252.0,84252.0,04-12-22,84252.0
1,379094,NYC EMPLOYEES RETIREMENT SYS,External,CERT IT DEVELOPER (APP),"Technology, Data & Innovation",82884.0,116391.0,04-12-22,99637.5
2,520417,NYC HOUSING AUTHORITY,External,EXECUTIVE AGENCY COUNSEL,Legal Affairs,105000.0,125000.0,04-12-22,115000.0
3,233549,NYC EMPLOYEES RETIREMENT SYS,External,CERTIFIED IT ADMINISTRATOR (LA,Information Technology & Telecommunications,87203.0,131623.0,04-12-22,109413.0
4,510256,HUMAN RIGHTS COMMISSION,External,ASSOCIATE HUMAN RIGHTS SPECIAL,Constituent Services & Community Programs,58449.0,67216.0,04-12-22,62832.5
...,...,...,...,...,...,...,...,...,...
3768,457839,NYC EMPLOYEES RETIREMENT SYS,Internal,CERTIFIED IT ADMINISTRATOR (LA,"Technology, Data & Innovation",85371.0,119883.0,04-12-22,102627.0
3769,527141,HOUSING PRESERVATION & DVLPMNT,Internal,ASSOCIATE HOUSING DEVELOPMENT,"Engineering, Architecture, & Planning",74650.0,85847.0,04-12-22,80248.5
3770,509443,FINANCIAL INFO SVCS AGENCY,Internal,SENIOR IT ARCHITECT,"Technology, Data & Innovation",100000.0,115000.0,04-12-22,107500.0
3771,352367,DEPT OF HEALTH/MENTAL HYGIENE,Internal,ECONOMIST,"Finance, Accounting, & Procurement",42288.0,59400.0,04-12-22,50844.0


### 3.0 Customized Fuctions

- Case 3.1: Wish to have capitalized formation in civil service title (only first letter will be capital, other are lower)


In [19]:
def capitalize_position(title):
    title_lower = title.lower()
    title_final = title_lower.capitalize()
    return title_final
data['Civil Service Title'].apply(capitalize_position)

0                  Public health nurse
1              Cert it developer (app)
2             Executive agency counsel
3       Certified it administrator (la
4       Associate human rights special
                     ...              
3768    Certified it administrator (la
3769     Associate housing development
3770               Senior it architect
3771                         Economist
3772           Agency attorney interne
Name: Civil Service Title, Length: 3773, dtype: object

In [20]:
# look at the df again, nothing changed, so need to store and see again
data.head()

Unnamed: 0,Job ID,Agency,Posting Type,Civil Service Title,Job Category,Salary Range From,Salary Range To,Process Date,Salary Avg.
0,424339,DEPT OF HEALTH/MENTAL HYGIENE,External,PUBLIC HEALTH NURSE,Health,84252.0,84252.0,04-12-22,84252.0
1,379094,NYC EMPLOYEES RETIREMENT SYS,External,CERT IT DEVELOPER (APP),"Technology, Data & Innovation",82884.0,116391.0,04-12-22,99637.5
2,520417,NYC HOUSING AUTHORITY,External,EXECUTIVE AGENCY COUNSEL,Legal Affairs,105000.0,125000.0,04-12-22,115000.0
3,233549,NYC EMPLOYEES RETIREMENT SYS,External,CERTIFIED IT ADMINISTRATOR (LA,Information Technology & Telecommunications,87203.0,131623.0,04-12-22,109413.0
4,510256,HUMAN RIGHTS COMMISSION,External,ASSOCIATE HUMAN RIGHTS SPECIAL,Constituent Services & Community Programs,58449.0,67216.0,04-12-22,62832.5


In [21]:
def capitalize_position(title):
    title_lower = title.lower()
    title_final = title_lower.capitalize()
    return title_final
data['Civil Service Title'] = data['Civil Service Title'].apply(capitalize_position) # see the update

In [22]:
data.head()

Unnamed: 0,Job ID,Agency,Posting Type,Civil Service Title,Job Category,Salary Range From,Salary Range To,Process Date,Salary Avg.
0,424339,DEPT OF HEALTH/MENTAL HYGIENE,External,Public health nurse,Health,84252.0,84252.0,04-12-22,84252.0
1,379094,NYC EMPLOYEES RETIREMENT SYS,External,Cert it developer (app),"Technology, Data & Innovation",82884.0,116391.0,04-12-22,99637.5
2,520417,NYC HOUSING AUTHORITY,External,Executive agency counsel,Legal Affairs,105000.0,125000.0,04-12-22,115000.0
3,233549,NYC EMPLOYEES RETIREMENT SYS,External,Certified it administrator (la,Information Technology & Telecommunications,87203.0,131623.0,04-12-22,109413.0
4,510256,HUMAN RIGHTS COMMISSION,External,Associate human rights special,Constituent Services & Community Programs,58449.0,67216.0,04-12-22,62832.5


- Alternate by Lambda Function

In [23]:
data['Agency'].apply(lambda x: x.lower().capitalize()) # x is single entry for a column, for example it alternate of title passed in fun used above inside ()

0        Dept of health/mental hygiene
1         Nyc employees retirement sys
2                Nyc housing authority
3         Nyc employees retirement sys
4              Human rights commission
                     ...              
3768      Nyc employees retirement sys
3769    Housing preservation & dvlpmnt
3770        Financial info svcs agency
3771     Dept of health/mental hygiene
3772           Human rights commission
Name: Agency, Length: 3773, dtype: object

###  4.0 Wish to make a list out of every row, Job ID to job cate, wish make all in a list

In [24]:
# test 4.1
data.apply(lambda x: x.tolist(), axis=1) # all columns merged in a single list / row

0       [424339, DEPT OF HEALTH/MENTAL HYGIENE, Extern...
1       [379094, NYC EMPLOYEES RETIREMENT SYS, Externa...
2       [520417, NYC HOUSING AUTHORITY, External, Exec...
3       [233549, NYC EMPLOYEES RETIREMENT SYS, Externa...
4       [510256, HUMAN RIGHTS COMMISSION, External, As...
                              ...                        
3768    [457839, NYC EMPLOYEES RETIREMENT SYS, Interna...
3769    [527141, HOUSING PRESERVATION & DVLPMNT, Inter...
3770    [509443, FINANCIAL INFO SVCS AGENCY, Internal,...
3771    [352367, DEPT OF HEALTH/MENTAL HYGIENE, Intern...
3772    [478739, HUMAN RIGHTS COMMISSION, External, Ag...
Length: 3773, dtype: object

In [25]:
# test 4.1: see the type, this is a series object
type(data.apply(lambda x: x.tolist(), axis=1))

pandas.core.series.Series

In [26]:
# test 4.2: This is a series object, but full of list
type(data.apply(lambda x: x.tolist(), axis=1)[0])

list

In [27]:
# 4.3, DataFrame
pd.DataFrame(data.apply(lambda x: x.tolist(), axis=1))

Unnamed: 0,0
0,"[424339, DEPT OF HEALTH/MENTAL HYGIENE, Extern..."
1,"[379094, NYC EMPLOYEES RETIREMENT SYS, Externa..."
2,"[520417, NYC HOUSING AUTHORITY, External, Exec..."
3,"[233549, NYC EMPLOYEES RETIREMENT SYS, Externa..."
4,"[510256, HUMAN RIGHTS COMMISSION, External, As..."
...,...
3768,"[457839, NYC EMPLOYEES RETIREMENT SYS, Interna..."
3769,"[527141, HOUSING PRESERVATION & DVLPMNT, Inter..."
3770,"[509443, FINANCIAL INFO SVCS AGENCY, Internal,..."
3771,"[352367, DEPT OF HEALTH/MENTAL HYGIENE, Intern..."


#### 4.4 For example from this series want to extract data by columns, by a func 'result_type'

In [28]:
# 4.4 expand component wise
data.apply(lambda x: x.tolist(), axis=1, result_type='expand') 

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,424339,DEPT OF HEALTH/MENTAL HYGIENE,External,Public health nurse,Health,84252.0,84252.0,04-12-22,84252.0
1,379094,NYC EMPLOYEES RETIREMENT SYS,External,Cert it developer (app),"Technology, Data & Innovation",82884.0,116391.0,04-12-22,99637.5
2,520417,NYC HOUSING AUTHORITY,External,Executive agency counsel,Legal Affairs,105000.0,125000.0,04-12-22,115000.0
3,233549,NYC EMPLOYEES RETIREMENT SYS,External,Certified it administrator (la,Information Technology & Telecommunications,87203.0,131623.0,04-12-22,109413.0
4,510256,HUMAN RIGHTS COMMISSION,External,Associate human rights special,Constituent Services & Community Programs,58449.0,67216.0,04-12-22,62832.5
...,...,...,...,...,...,...,...,...,...
3768,457839,NYC EMPLOYEES RETIREMENT SYS,Internal,Certified it administrator (la,"Technology, Data & Innovation",85371.0,119883.0,04-12-22,102627.0
3769,527141,HOUSING PRESERVATION & DVLPMNT,Internal,Associate housing development,"Engineering, Architecture, & Planning",74650.0,85847.0,04-12-22,80248.5
3770,509443,FINANCIAL INFO SVCS AGENCY,Internal,Senior it architect,"Technology, Data & Innovation",100000.0,115000.0,04-12-22,107500.0
3771,352367,DEPT OF HEALTH/MENTAL HYGIENE,Internal,Economist,"Finance, Accounting, & Procurement",42288.0,59400.0,04-12-22,50844.0


In [29]:
# 4.5 Get original column name
data.apply(lambda x: x.tolist(), axis=1, result_type='broadcast') 

Unnamed: 0,Job ID,Agency,Posting Type,Civil Service Title,Job Category,Salary Range From,Salary Range To,Process Date,Salary Avg.
0,424339,DEPT OF HEALTH/MENTAL HYGIENE,External,Public health nurse,Health,84252.0,84252.0,04-12-22,84252.0
1,379094,NYC EMPLOYEES RETIREMENT SYS,External,Cert it developer (app),"Technology, Data & Innovation",82884.0,116391.0,04-12-22,99637.5
2,520417,NYC HOUSING AUTHORITY,External,Executive agency counsel,Legal Affairs,105000.0,125000.0,04-12-22,115000.0
3,233549,NYC EMPLOYEES RETIREMENT SYS,External,Certified it administrator (la,Information Technology & Telecommunications,87203.0,131623.0,04-12-22,109413.0
4,510256,HUMAN RIGHTS COMMISSION,External,Associate human rights special,Constituent Services & Community Programs,58449.0,67216.0,04-12-22,62832.5
...,...,...,...,...,...,...,...,...,...
3768,457839,NYC EMPLOYEES RETIREMENT SYS,Internal,Certified it administrator (la,"Technology, Data & Innovation",85371.0,119883.0,04-12-22,102627.0
3769,527141,HOUSING PRESERVATION & DVLPMNT,Internal,Associate housing development,"Engineering, Architecture, & Planning",74650.0,85847.0,04-12-22,80248.5
3770,509443,FINANCIAL INFO SVCS AGENCY,Internal,Senior it architect,"Technology, Data & Innovation",100000.0,115000.0,04-12-22,107500.0
3771,352367,DEPT OF HEALTH/MENTAL HYGIENE,Internal,Economist,"Finance, Accounting, & Procurement",42288.0,59400.0,04-12-22,50844.0


# 5.0 Pandas Functions: Apply vs. Map vs. Applymap

1. Apply function applies on each entries/elements either row/ column
2. Map function besically not so used, because apply function can  do it
3. Apply map : basically applied on whole dataframe elements wise

#### Apply function

In [30]:
def capitalize(title):
    return title.lower().capitalize()

data['Agency'].apply(capitalize)

0        Dept of health/mental hygiene
1         Nyc employees retirement sys
2                Nyc housing authority
3         Nyc employees retirement sys
4              Human rights commission
                     ...              
3768      Nyc employees retirement sys
3769    Housing preservation & dvlpmnt
3770        Financial info svcs agency
3771     Dept of health/mental hygiene
3772           Human rights commission
Name: Agency, Length: 3773, dtype: object

#### Applymap function

In [31]:
# Applymap on whole dataset
def add_year(text):
    return str(text) + '_2023'

data.applymap(add_year)

Unnamed: 0,Job ID,Agency,Posting Type,Civil Service Title,Job Category,Salary Range From,Salary Range To,Process Date,Salary Avg.
0,424339_2023,DEPT OF HEALTH/MENTAL HYGIENE_2023,External_2023,Public health nurse_2023,Health_2023,84252.0_2023,84252.0_2023,04-12-22_2023,84252.0_2023
1,379094_2023,NYC EMPLOYEES RETIREMENT SYS_2023,External_2023,Cert it developer (app)_2023,"Technology, Data & Innovation_2023",82884.0_2023,116391.0_2023,04-12-22_2023,99637.5_2023
2,520417_2023,NYC HOUSING AUTHORITY_2023,External_2023,Executive agency counsel_2023,Legal Affairs_2023,105000.0_2023,125000.0_2023,04-12-22_2023,115000.0_2023
3,233549_2023,NYC EMPLOYEES RETIREMENT SYS_2023,External_2023,Certified it administrator (la_2023,Information Technology & Telecommunications_2023,87203.0_2023,131623.0_2023,04-12-22_2023,109413.0_2023
4,510256_2023,HUMAN RIGHTS COMMISSION_2023,External_2023,Associate human rights special_2023,Constituent Services & Community Programs_2023,58449.0_2023,67216.0_2023,04-12-22_2023,62832.5_2023
...,...,...,...,...,...,...,...,...,...
3768,457839_2023,NYC EMPLOYEES RETIREMENT SYS_2023,Internal_2023,Certified it administrator (la_2023,"Technology, Data & Innovation_2023",85371.0_2023,119883.0_2023,04-12-22_2023,102627.0_2023
3769,527141_2023,HOUSING PRESERVATION & DVLPMNT_2023,Internal_2023,Associate housing development_2023,"Engineering, Architecture, & Planning_2023",74650.0_2023,85847.0_2023,04-12-22_2023,80248.5_2023
3770,509443_2023,FINANCIAL INFO SVCS AGENCY_2023,Internal_2023,Senior it architect_2023,"Technology, Data & Innovation_2023",100000.0_2023,115000.0_2023,04-12-22_2023,107500.0_2023
3771,352367_2023,DEPT OF HEALTH/MENTAL HYGIENE_2023,Internal_2023,Economist_2023,"Finance, Accounting, & Procurement_2023",42288.0_2023,59400.0_2023,04-12-22_2023,50844.0_2023


# 6.0 Map on Series or Dictionary to the Map Function

In [32]:
# Create a series object
s = pd.Series(['Cat','Dog','NaN', 'Rabbit'])
s

0       Cat
1       Dog
2       NaN
3    Rabbit
dtype: object

In [33]:
# replace with Map with dic, if not matched that will ruturn Nan, be careful about that
s.map({'Cat':'Kitten','Dog':'Puppy'})

0    Kitten
1     Puppy
2       NaN
3       NaN
dtype: object

In [34]:
## Replace map with fuction
def change_word(title):
    return title.replace('DEPT','Division')

data['Agency'].map(change_word)

0       Division OF HEALTH/MENTAL HYGIENE
1            NYC EMPLOYEES RETIREMENT SYS
2                   NYC HOUSING AUTHORITY
3            NYC EMPLOYEES RETIREMENT SYS
4                 HUMAN RIGHTS COMMISSION
                      ...                
3768         NYC EMPLOYEES RETIREMENT SYS
3769       HOUSING PRESERVATION & DVLPMNT
3770           FINANCIAL INFO SVCS AGENCY
3771    Division OF HEALTH/MENTAL HYGIENE
3772              HUMAN RIGHTS COMMISSION
Name: Agency, Length: 3773, dtype: object

1. Apply map effect all df
2. Map on sigle column, lets take an example

In [37]:
data.head()

Unnamed: 0,Job ID,Agency,Posting Type,Civil Service Title,Job Category,Salary Range From,Salary Range To,Process Date,Salary Avg.
0,424339,DEPT OF HEALTH/MENTAL HYGIENE,External,Public health nurse,Health,84252.0,84252.0,04-12-22,84252.0
1,379094,NYC EMPLOYEES RETIREMENT SYS,External,Cert it developer (app),"Technology, Data & Innovation",82884.0,116391.0,04-12-22,99637.5
2,520417,NYC HOUSING AUTHORITY,External,Executive agency counsel,Legal Affairs,105000.0,125000.0,04-12-22,115000.0
3,233549,NYC EMPLOYEES RETIREMENT SYS,External,Certified it administrator (la,Information Technology & Telecommunications,87203.0,131623.0,04-12-22,109413.0
4,510256,HUMAN RIGHTS COMMISSION,External,Associate human rights special,Constituent Services & Community Programs,58449.0,67216.0,04-12-22,62832.5


In [38]:
# map on single col, like adding a sentence
data['Agency'].map('This Position is offered by {}'.format) # .format must be outside of quoation





0       This Position is offered by DEPT OF HEALTH/MEN...
1       This Position is offered by NYC EMPLOYEES RETI...
2       This Position is offered by NYC HOUSING AUTHORITY
3       This Position is offered by NYC EMPLOYEES RETI...
4       This Position is offered by HUMAN RIGHTS COMMI...
                              ...                        
3768    This Position is offered by NYC EMPLOYEES RETI...
3769    This Position is offered by HOUSING PRESERVATI...
3770    This Position is offered by FINANCIAL INFO SVC...
3771    This Position is offered by DEPT OF HEALTH/MEN...
3772    This Position is offered by HUMAN RIGHTS COMMI...
Name: Agency, Length: 3773, dtype: object

## Map Function

In [39]:
5%2 ==1 # Check odd & even. checking by 1 with 'True' that mean it check

True

In [40]:
5%2 ==0

False

### Ref

1. https://www.youtube.com/watch?v=DsjvCKxOdgI
2. https://www.youtube.com/watch?v=Emd4oXErshw