## Mapping New Values

In [1]:
import pandas as pd
air_quality = pd.read_pickle('air_quality.pkl')

In [2]:
air_quality['day_of_week_name']

0         Friday
1         Friday
2         Friday
3         Friday
4         Friday
          ...   
95680    Tuesday
95681    Tuesday
95682    Tuesday
95683    Tuesday
95684    Tuesday
Name: day_of_week_name, Length: 95685, dtype: object

In [3]:
# We can see that this column stores the days of the week.
# What if we wanted to compare weekdays vs weekend days?
air_quality['day_of_week_name'].value_counts()

day_of_week_name
Sunday       13931
Saturday     13812
Friday       13782
Thursday     13633
Monday       13536
Tuesday      13523
Wednesday    13468
Name: count, dtype: int64

### .map() method

In [4]:
# Using .map method mapping we can assign the days of the week to be determined as weekday or weekend
# You first create a dictonary with the associated assignment of the variables.
# Then use the .map method assigning the dictonary to the arg (argument)
# You can see that it lists weekday in the output. Look back when we ran air_quality['day_of_week_name'] initially, you can see the first and last 5 we're tuesday and friday. Both weekdays
day_of_week_mapping = { 'Monday':'Weekday',
                       'Tuesday':'Weekday',
                       'Wednesday':'Weekday',
                       'Thursday':'Weekday',
                       'Friday':'Weekday',
                       'Saturday':'Weekend',
                       'Sunday':'Weekend',
}
air_quality['day_of_week_name'].map(arg = day_of_week_mapping)

0        Weekday
1        Weekday
2        Weekday
3        Weekday
4        Weekday
          ...   
95680    Weekday
95681    Weekday
95682    Weekday
95683    Weekday
95684    Weekday
Name: day_of_week_name, Length: 95685, dtype: object

In [5]:
# Double check using the value counts to ensure there should only be 2 different variables listed
air_quality['day_of_week_name'].map(arg = day_of_week_mapping).value_counts()

day_of_week_name
Weekday    67942
Weekend    27743
Name: count, dtype: int64

In [6]:
# The .map() method does not save to the column. to do you you'd need to assign back to the original air_quality['day_of_week_name'] series
air_quality['day_of_week_name'].value_counts()

day_of_week_name
Sunday       13931
Saturday     13812
Friday       13782
Thursday     13633
Monday       13536
Tuesday      13523
Wednesday    13468
Name: count, dtype: int64

## .map() can also apply built in functions, custom functions, lambda functions

#### Built in function

In [7]:
# len returns length of the string. 
air_quality['day_of_week_name'].map(len)

0        6
1        6
2        6
3        6
4        6
        ..
95680    7
95681    7
95682    7
95683    7
95684    7
Name: day_of_week_name, Length: 95685, dtype: int64

In [8]:
# The int functions can return ints based off floats
air_quality['time_until_2022_days'].map(int)

0        3228
1        3227
2        3227
3        3227
4        3227
         ... 
95680    1767
95681    1767
95682    1767
95683    1767
95684    1767
Name: time_until_2022_days, Length: 95685, dtype: int64

#### Lambda function

In [9]:
# The lambda function is a small function, in this case we use here to set the weekend or weekdays similar to above
air_quality['day_of_week_name'].map(lambda day: 'Weekend' if day in ['Saturday', 'Sunday'] else 'Weekday')

0        Weekday
1        Weekday
2        Weekday
3        Weekday
4        Weekday
          ...   
95680    Weekday
95681    Weekday
95682    Weekday
95683    Weekday
95684    Weekday
Name: day_of_week_name, Length: 95685, dtype: object

#### Custom Defined Function

In [11]:
def weekday_weekend(day):
    if day in ['Saturday', 'Sunday']:
        return 'Weekend'
    else:
        return 'Weekday'

air_quality['day_of_week_name'].map(weekday_weekend)

0        Weekday
1        Weekday
2        Weekday
3        Weekday
4        Weekday
          ...   
95680    Weekday
95681    Weekday
95682    Weekday
95683    Weekday
95684    Weekday
Name: day_of_week_name, Length: 95685, dtype: object

#### Another custom function with .map()

In [12]:
def cardinal_dir(dir):
    if dir[0] == 'N':
        return 'North'
    elif dir[0] == 'E':
        return 'East'
    elif dir[0] == 'S':
        return 'South'
    else:
        return 'West'

air_quality['wd'].map(cardinal_dir)

0        North
1        North
2        North
3        North
4        North
         ...  
95680    North
95681    North
95682    North
95683    North
95684    North
Name: wd, Length: 95685, dtype: object

In [13]:
air_quality['wd'].map(cardinal_dir).value_counts()

wd
North    34068
South    26054
East     22699
West     12864
Name: count, dtype: int64