### MAPPING VALUES TO COLUMNS

+ The `.map()` method will map the values to a Column or an Entire Dataframe.
+ Can pass dictionary with existing values as keys.
+ And new values as Values.
+ You can apply lambda functions as well.

Column Creation with `.assign` method.

+ The assign method creates multiple columns and return Dataframe.
+ This can be chained with Other Daat Processing Steps.

In [1]:
import pandas as pd
import numpy as np

In [9]:
product_df = pd.read_csv("product.csv")
product_df
mapping_dict = {
    "Diary" : "Non-Vegan",
    "Vegetables" : "Vegan",
    "Fruits" : "Vegan"
}

product_df["Vegan??"] = product_df["PRODUCT_ID"].map(mapping_dict)
product_df

Unnamed: 0,PRODUCT_ID,MANUFACTURER,DEPARTMENT,BRAND,COMMODITY_DESC,SUB_COMMODITY_DESC,CURR_SIZE_OF_PRODUCT,Vegan??
0,25671,2,GROCERY,National,FRZN ICE,ICE - CRUSHED/CUBED,22 LB,
1,26081,2,MISC. TRANS.,National,NO COMMODITY DESCRIPTION,NO SUBCOMMODITY DESCRIPTION,,
2,26093,69,PASTRY,Private,BREAD,BREAD:ITALIAN/FRENCH,,
3,26190,69,GROCERY,Private,FRUIT - SHELF STABLE,APPLE SAUCE,50 OZ,
4,26355,69,GROCERY,Private,COOKIES/CONES,SPECIALTY COOKIES,14 OZ,
...,...,...,...,...,...,...,...,...
92348,18293142,6384,DRUG GM,National,BOOKSTORE,PAPERBACK BOOKS,,
92349,18293439,6393,DRUG GM,National,BOOKSTORE,CHILDRENS LOW END,,
92350,18293696,6406,DRUG GM,National,BOOKSTORE,PAPERBACK BEST SELLER,,
92351,18294080,6442,DRUG GM,National,BOOKSTORE,PAPERBACK BOOKS,,


In [16]:
retail = pd.read_csv("retail_2016_2017.csv")
retail.loc[:, "family"].value_counts()

HARDWARE                      31968
HOME AND KITCHEN II           31968
FROZEN FOODS                  31968
CLEANING                      31968
SCHOOL AND OFFICE SUPPLIES    31968
HOME APPLIANCES               31968
PLAYERS AND ELECTRONICS       31968
AUTOMOTIVE                    31968
PREPARED FOODS                31968
POULTRY                       31968
GROCERY I                     31968
BEVERAGES                     31968
PERSONAL CARE                 31968
BREAD/BAKERY                  31968
LADIESWEAR                    31968
LAWN AND GARDEN               31968
GROCERY II                    31968
PRODUCE                       31968
BABY CARE                     31968
HOME CARE                     31968
LIQUOR,WINE,BEER              31968
LINGERIE                      31968
DAIRY                         31968
DELI                          31968
PET SUPPLIES                  31968
SEAFOOD                       31968
MAGAZINES                     31968
BOOKS                       

In [17]:
product_category_dict = {
    "PRODUCE" : "Grocery",
    "POULTRY" : "Grocery",
    "GROCERY I" : "Grocery",
    "GROCERY II" : "Grocery",
    "EGGS" : "Grocery"
}

In [18]:
retail.loc[:, "family"].map(product_category_dict)

0              NaN
1              NaN
2              NaN
3              NaN
4              NaN
            ...   
1054939    Grocery
1054940        NaN
1054941    Grocery
1054942        NaN
1054943        NaN
Name: family, Length: 1054944, dtype: object

In [19]:
retail.loc[:, "family"].map(product_category_dict).value_counts()

Grocery    159840
Name: family, dtype: int64

In [20]:
retail.loc[:, "family"].map(product_category_dict).value_counts(dropna = False)

NaN        895104
Grocery    159840
Name: family, dtype: int64

In [38]:
retail

retail.assign(
        tax_amount = (retail["sales"] * 0.18).round(2),
        on_promotion_flag = (retail["onpromotion"] > 0),
        year = (retail["date"].str[:4].astype("int")),
        month = (retail["date"].str[5:7].astype("int")),
        day = (retail["date"].str[8:].astype("int"))
).query("year < 2020")

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion,tax_amount,on_promotion_flag,year,month,day
0,1945944,2016-01-01,1,AUTOMOTIVE,0.000,0,0.00,False,2016,1,1
1,1945945,2016-01-01,1,BABY CARE,0.000,0,0.00,False,2016,1,1
2,1945946,2016-01-01,1,BEAUTY,0.000,0,0.00,False,2016,1,1
3,1945947,2016-01-01,1,BEVERAGES,0.000,0,0.00,False,2016,1,1
4,1945948,2016-01-01,1,BOOKS,0.000,0,0.00,False,2016,1,1
...,...,...,...,...,...,...,...,...,...,...,...
1054939,3000883,2017-08-15,9,POULTRY,438.133,0,78.86,False,2017,8,15
1054940,3000884,2017-08-15,9,PREPARED FOODS,154.553,1,27.82,True,2017,8,15
1054941,3000885,2017-08-15,9,PRODUCE,2419.729,148,435.55,True,2017,8,15
1054942,3000886,2017-08-15,9,SCHOOL AND OFFICE SUPPLIES,121.000,8,21.78,True,2017,8,15


In [65]:
retail
sample_df = retail.sample(5, random_state = 82)
sample_df

sample_df.assign(
    on_promotion_flag = sample_df["onpromotion"] > 0,
    family_abbreviation = sample_df["family"].str[:3],
    gst_amount = sample_df["sales"] * 0.18,
    year = sample_df["date"].str[:4],
    month = sample_df["date"].str[5:7],
    day = sample_df["date"].str[8:]
)

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion,on_promotion_flag,family_abbreviation,gst_amount,year,month,day
889389,2835333,2017-05-15,14,CELEBRATION,15.0,0,False,CEL,2.7,2017,5,15
157108,2103052,2016-03-29,17,POULTRY,317.235,0,False,POU,57.1023,2016,3,29
153391,2099335,2016-03-27,13,CLEANING,536.0,0,False,CLE,96.48,2016,3,27
403863,2349807,2016-08-14,40,DELI,262.0,5,True,DEL,47.16,2016,8,14
148520,2094464,2016-03-24,26,LAWN AND GARDEN,0.0,0,False,LAW,0.0,2016,3,24
