# Climate-Friendly Food Systems (CFFS) Labelling Project

### The University of British Columbia

#### Created by Silvia Huang, CFFS Data Analyst
***

## Part III: Update Information and Mapping

# Climate-Friendly Food Systems (CFFS) Labelling Project

### The University of British Columbia

#### Created by Silvia Huang, CFFS Data Analyst
***

## Part III: Update Information and Mapping

## Set up and Import Libraries

In [1]:
#pip install -r requirements.txt

In [2]:
import numpy as np
import pandas as pd
import pdpipe as pdp
import matplotlib.pyplot as plt
import glob
import os
import csv
from itertools import islice
from decimal import Decimal
import xml.etree.ElementTree as et
from xml.etree.ElementTree import parse
import openpyxl
import pytest
from datetime import datetime

  from tqdm.autonotebook import tqdm


In [3]:
# Set the root path, change the the current working directory into the project folder
path = os.getcwd()
os.chdir(path)

In [4]:
# Enable reading data table in the scrolling window if you prefer
pd.set_option("display.max_rows", None, "display.max_columns", None)

***
## Import Preprocessed Datasets

In [5]:
Preps = pd.read_csv(os.path.join(os.getcwd(), "data", "cleaning", "Preps_Unit_Cleaned.csv"))
Preps.head()

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-27317,BATCH|3 Bean Vegetable Chili,21.0,Kg,PREP,21000.0,g
1,P-56699,BATCH|Cauliflower Wings|Baked,1.0,Kg,,1000.0,g
2,P-64163,Batch|Chicken Burger|Shoyu,7.65,Kg,PREP,7650.0,g
3,P-26333,BATCH|Citrus Herb Aioli,3.0,L,PREP,3000.0,ml
4,P-53598,BATCH|Coleslaw mix,1.5,Kg,,1500.0,g


In [6]:
ghge_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "ghge_factors.csv"))
ghge_factors.head()

Unnamed: 0,Category ID,Food Category,Active Total Supply Chain Emissions (kg CO2 / kg food)
0,1,beef & buffalo meat,41.3463
1,2,lamb/mutton & goat meat,41.6211
2,3,pork (pig meat),9.8315
3,4,"poultry (chicken, turkey)",4.3996
4,5,butter,11.4316


In [7]:
nitro_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "nitrogen_factors.csv"))
nitro_factors.head()

Unnamed: 0,Category ID,Food Category,g N lost/kg product
0,1,beef & buffalo meat,329.5
1,2,lamb/mutton & goat meat,231.15
2,3,pork (pig meat),132.8
3,4,"poultry (chicken, turkey)",116.8
4,5,butter,100.35


In [8]:
water_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "water_factors.csv"))
water_factors.head()

Unnamed: 0,Category ID,Food Category,Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
0,1,beef & buffalo meat,1677.2,61309.0
1,2,lamb/mutton & goat meat,461.2,258.9
2,3,pork (pig meat),1810.3,54242.7
3,4,"poultry (chicken, turkey)",370.3,333.5
4,5,butter,1010.176,50055.168


In [9]:
# Load current Items List with assigned Emission Factors Category ID
Items_Assigned = pd.read_csv(os.path.join(os.getcwd(), "data", "mapping", "Items_List_Assigned.csv"))
Items_Assigned.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-57545,1.0,CHUCK FLAT BONELESS FZN,3.3,Kg,1.0,Kg,MEAT
1,I-10869,1.0,BEEF STIRFRY COV FR,5.0,Kg,1.0,Kg,MEAT
2,I-7064,1.0,BEEF OUTSIDE FLAT AAA,1.0,Kg,1.0,Kg,MEAT
3,I-37005,1.0,BEEF MEATBALLS,4.54,Kg,1000.0,g,MEAT
4,I-37002,1.0,BEEF INSIDE ROUND SHAVED,9.0,Kg,1000.0,g,MEAT


In [10]:
new_items = pd.read_csv("data/mapping/new items added/New_Items_Added_11.csv")
new_items

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-68718,24,BUN HAMBURGER WW VEGAN 85GR,1.0,each,1,each,BREAD
1,I-68700,11,EGG CKD FOLDED VEGAN FRZ,60.0,each,1,each,DAIRY
2,I-54711,6,CHEESE HALLOUMI TRE STELLE,6.0,pak,160,g,DAIRY
3,I-4757,39,ONIONS RED,25.0,lb,1,lb,PRODUCE
4,I-36794,25,RICE BASMATI INDIAN,1.0,ea,40,lb,FOOD - GROCERY
5,I-3582,54,SEASONING MONTREAL STK,3.4,Kg,1,Kg,SPICES
6,I-65714,58,CHUTNEY MANGO-TAMARIND,2.0,un,2268,g,FOOD - GROCERY
7,I-19462,58,SAUCE SOY SWEET,4.0,JUG,4300,ml,FOOD - GROCERY
8,I-30167,41,"SWEET POTATO DICED 1/4""",5.0,lb,1,lb,PRODUCE
9,I-11549,35,CARROTS ORANGE UBC,1.0,lb,1,lb,PRODUCE


In [11]:
item_nonstd = pd.read_csv("data/cleaning/Items_Nonstd.csv")
item_nonstd

Unnamed: 0,IngredientId,Qty,Uom,Conversion,InvFactor,Recipe


In [12]:
preps_nonstd = pd.read_csv("data/cleaning/Preps_NonstdUom.csv")
preps_nonstd

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom


In [13]:
conv_updatecov = pd.read_csv("data/cleaning/update/Conv_UpdateConv.csv")
conv_updatecov

Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom
0,I-1028,0.008333,1.0,CT,120.0,g
1,I-1034,0.008333,1.0,CT,120.0,g
2,I-1035,0.01,1.0,CT,100.0,g
3,I-10605,0.00885,1.0,CT,113.0,g
4,I-1126,0.006667,1.0,CT,150.0,g
5,I-1127,0.006667,1.0,CT,150.0,g
6,I-1141,0.013333,1.0,CT,75.0,g
7,I-1143,0.013333,1.0,CT,75.0,g
8,I-11519,0.02,1.0,bag,50.0,g
9,I-1152,0.013333,1.0,CT,75.0,g


In [14]:
conv_updatecov.loc[conv_updatecov["ConversionId"] == "P-54581"]

Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom


In [15]:
Preps.loc[Preps["PrepId"] == "P-54581"]

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom


***
## Import Update Info

In [16]:
# Import list of prep that need convert uom to standard uom manually
Manual_PrepU = pd.read_csv(os.path.join(os.getcwd(), "data", "cleaning", "update", "Preps_UpdateUom.csv"))
Manual_PrepU.head()

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-54697,LEMON|Wedge 1/8,8.0,each,PREP,84.0,g
1,P-35132,MARINATED|Lemon & Herb Chx,185.0,ea,PREP,24050.0,g
2,P-51992,YIELD|Bread|Sourdough 5/8,36.0,slice,,1620.0,g
3,P-26234,BATCH|Roasted Garlic Bread,16.0,ea,PREP,1280.0,g
4,P-26170,GRILLED|NaanBread,1.0,ea,PREP,125.0,g


In [17]:
Manual_PrepU.loc[Manual_PrepU["PrepId"] == "P-54581"]

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
212,P-54581,SLICE|Multigrain Bread,22.0,slice,,38.0,g


In [18]:
# Select the file path for new items list with category id
New_Items_Added = pd.read_csv(os.path.join(os.getcwd(), "data", "mapping", "new items added", "New_Items_Added_10.csv"))
New_Items_Added.tail(15)

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
34,I-64877,3,TMRW SAUSAGE BREAKFAST PATTY,100,each,1.0,ea,MEAT
35,I-55331,4,CHICK BREAST BL/SO HAL TENDOUT,1,Kg,1.0,Kg,POULTRY
36,I-3999,4,CHICK DRUMSTICK HALAL,1,Kg,1.0,Kg,POULTRY
37,I-4465,36,ASPARAGUS (LARGE) MX,11,lb,1.0,lb,PRODUCE
38,I-22443,40,BAMBOO SHOOTS STRIP,6,LG CAN,2.84,L,PRODUCE
39,I-10616,17,BEANS ROMANO,1,lb,1.0,lb,PRODUCE
40,I-4582,38,CARROTS BABY BUNCHED BC,1,each,1.0,CT,PRODUCE
41,I-11670,40,COLESLAW MIX CABBAGE&CARROT,5,lb,1.0,lb,PRODUCE
42,I-10265,37,TOMATOES HEIRLOOM BC,1,lb,1.0,lb,PRODUCE
43,I-4849,36,SALAD MIX ARTISAN,3,bag,2.0,lb,PRODUCE


In [19]:
# Import list of items that adjusted GHGe factor manually
Manual_Factor = pd.read_csv(os.path.join(os.getcwd(), "data", "mapping", "Manual_Adjust_Factors.csv"))
Manual_Factor.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
0,I-52090,59,BURGER BEEF & MUSHROOM HALAL,1.0,cs,48.0,CT,MEAT,25.00894,200.86,1038.84,37961.2
1,I-45558,59,Prep-Vegan Parmesan,1000.0,g,1.0,g,PRODUCTION FOOD,3.85686,0.0,0.0,0.0
2,I-3352,59,MAYONNAISE PAIL TFC 4L,2.0,each,4.0,L,FOOD - GROCERY,3.55,0.0,0.0,0.0
3,I-3223,59,COCONUT MILK 17/19% MILK FAT,6.0,LG CAN,2.84,L,FOOD - GROCERY,3.5,0.0,1.0,1.0
4,I-2898,59,MUSTARD DIJON WINE FLEUR,6.0,jar,1.0,Kg,FOOD - GROCERY,3.326,0.0,0.0,0.0


### Update Correct Uom for Preps

In [20]:
# Update prep list with manully adjusted uom
for index, row in Manual_PrepU.iterrows():
    PrepId = Manual_PrepU.loc[index, 'PrepId']
    qty = Manual_PrepU.loc[index, 'StdQty']
    uom = Manual_PrepU.loc[index, 'StdUom']
    Preps.loc[Preps['PrepId'] == PrepId, 'StdQty'] = qty
    Preps.loc[Preps['PrepId'] == PrepId, 'StdUom'] = uom

In [21]:
Preps.drop_duplicates(subset=['PrepId'], inplace=True,)

In [22]:
Preps.head()

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-27317,BATCH|3 Bean Vegetable Chili,21.0,Kg,PREP,21000.0,g
1,P-56699,BATCH|Cauliflower Wings|Baked,1.0,Kg,,1000.0,g
2,P-64163,Batch|Chicken Burger|Shoyu,7.65,Kg,PREP,7650.0,g
3,P-26333,BATCH|Citrus Herb Aioli,3.0,L,PREP,3000.0,ml
4,P-53598,BATCH|Coleslaw mix,1.5,Kg,,1500.0,g


In [23]:
Preps.shape

(742, 7)

In [24]:
path = os.path.join(os.getcwd(), "data", "cleaning", "Preps_List_Cleaned.csv")
Preps.to_csv(path, index = False, header = True)

In [25]:
Items_Assigned.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-57545,1.0,CHUCK FLAT BONELESS FZN,3.3,Kg,1.0,Kg,MEAT
1,I-10869,1.0,BEEF STIRFRY COV FR,5.0,Kg,1.0,Kg,MEAT
2,I-7064,1.0,BEEF OUTSIDE FLAT AAA,1.0,Kg,1.0,Kg,MEAT
3,I-37005,1.0,BEEF MEATBALLS,4.54,Kg,1000.0,g,MEAT
4,I-37002,1.0,BEEF INSIDE ROUND SHAVED,9.0,Kg,1000.0,g,MEAT


In [26]:
New_Items_Added.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-13791,24,BAR GF NANAIMO 3X3 WRAPPED,12,CT,1.0,CT,BAKED GOODS
1,I-63679,24,BUTTERHORNS EACH,1,ea,1.0,ea,BAKED GOODS
2,I-31545,24,CAKE CARROT CARM STACK,48,slice,1.0,slice,BAKED GOODS
3,I-1057,24,CAKE CHOC TRIPLE TIGER 12X16IN,2,SHEET,1.0,SHEET,BAKED GOODS
4,I-60871,24,CAKE SHEET CHOC LAYER 12X16IN,2,SHEET,1.0,SHEET,BAKED GOODS


### Import List of New Items with Emission Factors Category ID Assigned

In [27]:
frames = [Items_Assigned, New_Items_Added]
Items_Assigned_Updated = pd.concat(frames).reset_index(drop=True, inplace=False).drop_duplicates()
Items_Assigned_Updated.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-57545,1.0,CHUCK FLAT BONELESS FZN,3.3,Kg,1.0,Kg,MEAT
1,I-10869,1.0,BEEF STIRFRY COV FR,5.0,Kg,1.0,Kg,MEAT
2,I-7064,1.0,BEEF OUTSIDE FLAT AAA,1.0,Kg,1.0,Kg,MEAT
3,I-37005,1.0,BEEF MEATBALLS,4.54,Kg,1000.0,g,MEAT
4,I-37002,1.0,BEEF INSIDE ROUND SHAVED,9.0,Kg,1000.0,g,MEAT


In [28]:
Items_Assigned_Updated

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-57545,1.0,CHUCK FLAT BONELESS FZN,3.3,Kg,1.0,Kg,MEAT
1,I-10869,1.0,BEEF STIRFRY COV FR,5.0,Kg,1.0,Kg,MEAT
2,I-7064,1.0,BEEF OUTSIDE FLAT AAA,1.0,Kg,1.0,Kg,MEAT
3,I-37005,1.0,BEEF MEATBALLS,4.54,Kg,1000.0,g,MEAT
4,I-37002,1.0,BEEF INSIDE ROUND SHAVED,9.0,Kg,1000.0,g,MEAT
5,I-3876,1.0,BEEF CHUCK GROUND AAA,1.0,Kg,1.0,Kg,MEAT
6,I-34065,1.0,BEEF BONES KNUCKLE FZ,1.0,Kg,1.0,Kg,MEAT
7,I-11661,1.0,"BEEF STEW 3/4"" FROZEN",5.0,Kg,1.0,Kg,MEAT
8,I-3837,1.0,"BEEF STEW DICED 3/4""FR",1.0,Kg,1.0,Kg,MEAT
9,I-43648,1.0,BRISKET BEEF CORN SMKD FR,4.5,Kg,1.0,Kg,MEAT


In [29]:
Items_Assigned_Updated.shape

(2077, 8)

In [30]:
Items_Assigned_Updated[['CategoryID']] = Items_Assigned_Updated[['CategoryID']].apply(pd.to_numeric)

In [31]:
Items_Assigned_Updated

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-57545,1.0,CHUCK FLAT BONELESS FZN,3.3,Kg,1.0,Kg,MEAT
1,I-10869,1.0,BEEF STIRFRY COV FR,5.0,Kg,1.0,Kg,MEAT
2,I-7064,1.0,BEEF OUTSIDE FLAT AAA,1.0,Kg,1.0,Kg,MEAT
3,I-37005,1.0,BEEF MEATBALLS,4.54,Kg,1000.0,g,MEAT
4,I-37002,1.0,BEEF INSIDE ROUND SHAVED,9.0,Kg,1000.0,g,MEAT
5,I-3876,1.0,BEEF CHUCK GROUND AAA,1.0,Kg,1.0,Kg,MEAT
6,I-34065,1.0,BEEF BONES KNUCKLE FZ,1.0,Kg,1.0,Kg,MEAT
7,I-11661,1.0,"BEEF STEW 3/4"" FROZEN",5.0,Kg,1.0,Kg,MEAT
8,I-3837,1.0,"BEEF STEW DICED 3/4""FR",1.0,Kg,1.0,Kg,MEAT
9,I-43648,1.0,BRISKET BEEF CORN SMKD FR,4.5,Kg,1.0,Kg,MEAT


In [32]:
path = os.path.join(os.getcwd(), "data", "mapping", "Items_List_Assigned.csv")
Items_Assigned_Updated.to_csv(path, index = False, header = True)

## Mapping Items to Footprint Factors

In [33]:
# Map GHG footprint factors
mapping = pd.merge(Items_Assigned_Updated, ghge_factors.loc[:,['Category ID','Food Category','Active Total Supply Chain Emissions (kg CO2 / kg food)']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')
for index in mapping.index:
    if np.isnan(mapping.loc[index,'Category ID']):
        mapping.loc[index,'Active Total Supply Chain Emissions (kg CO2 / kg food)'] = 0

mapping = mapping.drop(columns=['Category ID', 'Food Category'])
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Active Total Supply Chain Emissions (kg CO2 / kg food)
0,I-57545,1.0,CHUCK FLAT BONELESS FZN,3.3,Kg,1.0,Kg,MEAT,41.3463
1,I-10869,1.0,BEEF STIRFRY COV FR,5.0,Kg,1.0,Kg,MEAT,41.3463
2,I-7064,1.0,BEEF OUTSIDE FLAT AAA,1.0,Kg,1.0,Kg,MEAT,41.3463
3,I-37005,1.0,BEEF MEATBALLS,4.54,Kg,1000.0,g,MEAT,41.3463
4,I-37002,1.0,BEEF INSIDE ROUND SHAVED,9.0,Kg,1000.0,g,MEAT,41.3463
5,I-3876,1.0,BEEF CHUCK GROUND AAA,1.0,Kg,1.0,Kg,MEAT,41.3463
6,I-34065,1.0,BEEF BONES KNUCKLE FZ,1.0,Kg,1.0,Kg,MEAT,41.3463
7,I-11661,1.0,"BEEF STEW 3/4"" FROZEN",5.0,Kg,1.0,Kg,MEAT,41.3463
8,I-3837,1.0,"BEEF STEW DICED 3/4""FR",1.0,Kg,1.0,Kg,MEAT,41.3463
9,I-43648,1.0,BRISKET BEEF CORN SMKD FR,4.5,Kg,1.0,Kg,MEAT,41.3463


In [34]:
# Map nitrogen footprint factors
mapping = pd.merge(mapping, nitro_factors.loc[:,['Category ID','Food Category','g N lost/kg product']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')

for index in mapping.index:
    if np.isnan(mapping.loc[index,'Category ID']):
        mapping.loc[index,'g N lost/kg product'] = 0

mapping = mapping.drop(columns=['Category ID', 'Food Category'])
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product
0,I-57545,1.0,CHUCK FLAT BONELESS FZN,3.3,Kg,1.0,Kg,MEAT,41.3463,329.5
1,I-10869,1.0,BEEF STIRFRY COV FR,5.0,Kg,1.0,Kg,MEAT,41.3463,329.5
2,I-7064,1.0,BEEF OUTSIDE FLAT AAA,1.0,Kg,1.0,Kg,MEAT,41.3463,329.5
3,I-37005,1.0,BEEF MEATBALLS,4.54,Kg,1000.0,g,MEAT,41.3463,329.5
4,I-37002,1.0,BEEF INSIDE ROUND SHAVED,9.0,Kg,1000.0,g,MEAT,41.3463,329.5
5,I-3876,1.0,BEEF CHUCK GROUND AAA,1.0,Kg,1.0,Kg,MEAT,41.3463,329.5
6,I-34065,1.0,BEEF BONES KNUCKLE FZ,1.0,Kg,1.0,Kg,MEAT,41.3463,329.5
7,I-11661,1.0,"BEEF STEW 3/4"" FROZEN",5.0,Kg,1.0,Kg,MEAT,41.3463,329.5
8,I-3837,1.0,"BEEF STEW DICED 3/4""FR",1.0,Kg,1.0,Kg,MEAT,41.3463,329.5
9,I-43648,1.0,BRISKET BEEF CORN SMKD FR,4.5,Kg,1.0,Kg,MEAT,41.3463,329.5


In [35]:
# Map water footprint factors
mapping = pd.merge(mapping, water_factors.loc[:,['Category ID','Food Category','Freshwater Withdrawals (L/FU)', 'Stress-Weighted Water Use (L/FU)']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')

for index in mapping.index:
    if np.isnan(mapping.loc[index,'Category ID']):
        mapping.loc[index,'Freshwater Withdrawals (L/FU)'] = 0
        mapping.loc[index,'Stress-Weighted Water Use (L/FU)'] = 0

mapping = mapping.drop(columns=['Category ID', 'Food Category'])
mapping.drop_duplicates(subset=["ItemId"], inplace=True)
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
0,I-57545,1.0,CHUCK FLAT BONELESS FZN,3.3,Kg,1.0,Kg,MEAT,41.3463,329.5,1677.2,61309.0
1,I-10869,1.0,BEEF STIRFRY COV FR,5.0,Kg,1.0,Kg,MEAT,41.3463,329.5,1677.2,61309.0
2,I-7064,1.0,BEEF OUTSIDE FLAT AAA,1.0,Kg,1.0,Kg,MEAT,41.3463,329.5,1677.2,61309.0
3,I-37005,1.0,BEEF MEATBALLS,4.54,Kg,1000.0,g,MEAT,41.3463,329.5,1677.2,61309.0
4,I-37002,1.0,BEEF INSIDE ROUND SHAVED,9.0,Kg,1000.0,g,MEAT,41.3463,329.5,1677.2,61309.0
5,I-3876,1.0,BEEF CHUCK GROUND AAA,1.0,Kg,1.0,Kg,MEAT,41.3463,329.5,1677.2,61309.0
6,I-34065,1.0,BEEF BONES KNUCKLE FZ,1.0,Kg,1.0,Kg,MEAT,41.3463,329.5,1677.2,61309.0
7,I-11661,1.0,"BEEF STEW 3/4"" FROZEN",5.0,Kg,1.0,Kg,MEAT,41.3463,329.5,1677.2,61309.0
8,I-3837,1.0,"BEEF STEW DICED 3/4""FR",1.0,Kg,1.0,Kg,MEAT,41.3463,329.5,1677.2,61309.0
9,I-43648,1.0,BRISKET BEEF CORN SMKD FR,4.5,Kg,1.0,Kg,MEAT,41.3463,329.5,1677.2,61309.0


In [36]:
mapping[mapping["ItemId"] == "I-4524"]

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
2006,I-4524,38.0,BEETS GOLDBC,5.0,lb,1.0,lb,PRODUCE,0.3062,7.9,9.9,37.9


In [37]:
mapping["CategoryID"].isnull().sum()

0

### Manully Adjust Footprint Factor for Specific Items

In [38]:
for index, row in Manual_Factor.iterrows():
    itemId = Manual_Factor.loc[index, 'ItemId']
    ghge = Manual_Factor.loc[index, 'Active Total Supply Chain Emissions (kg CO2 / kg food)']
    nitro = Manual_Factor.loc[index, 'g N lost/kg product']
    water = Manual_Factor.loc[index, 'Freshwater Withdrawals (L/FU)']
    str_water = Manual_Factor.loc[index, 'Stress-Weighted Water Use (L/FU)']
    mapping.loc[mapping['ItemId'] == itemId, 'Active Total Supply Chain Emissions (kg CO2 / kg food)'] = ghge
    mapping.loc[mapping['ItemId'] == itemId, 'g N lost/kg product'] = nitro
    mapping.loc[mapping['ItemId'] == itemId, 'Freshwater Withdrawals (L/FU)'] = water
    mapping.loc[mapping['ItemId'] == itemId, 'Stress-Weighted Water Use (L/FU)'] = str_water

In [39]:
mapping.drop_duplicates(subset = ['ItemId'], inplace=True)
mapping.dtypes

ItemId                                                     object
CategoryID                                                float64
Description                                                object
CaseQty                                                   float64
CaseUOM                                                    object
PakQty                                                    float64
PakUOM                                                     object
InventoryGroup                                             object
Active Total Supply Chain Emissions (kg CO2 / kg food)    float64
g N lost/kg product                                       float64
Freshwater Withdrawals (L/FU)                             float64
Stress-Weighted Water Use (L/FU)                          float64
dtype: object

In [40]:
mapping.shape

(2077, 12)

In [41]:
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
0,I-57545,1.0,CHUCK FLAT BONELESS FZN,3.3,Kg,1.0,Kg,MEAT,41.3463,329.5,1677.2,61309.0
1,I-10869,1.0,BEEF STIRFRY COV FR,5.0,Kg,1.0,Kg,MEAT,41.3463,329.5,1677.2,61309.0
2,I-7064,1.0,BEEF OUTSIDE FLAT AAA,1.0,Kg,1.0,Kg,MEAT,41.3463,329.5,1677.2,61309.0
3,I-37005,1.0,BEEF MEATBALLS,4.54,Kg,1000.0,g,MEAT,41.3463,329.5,1677.2,61309.0
4,I-37002,1.0,BEEF INSIDE ROUND SHAVED,9.0,Kg,1000.0,g,MEAT,41.3463,329.5,1677.2,61309.0
5,I-3876,1.0,BEEF CHUCK GROUND AAA,1.0,Kg,1.0,Kg,MEAT,41.3463,329.5,1677.2,61309.0
6,I-34065,1.0,BEEF BONES KNUCKLE FZ,1.0,Kg,1.0,Kg,MEAT,41.3463,329.5,1677.2,61309.0
7,I-11661,1.0,"BEEF STEW 3/4"" FROZEN",5.0,Kg,1.0,Kg,MEAT,41.3463,329.5,1677.2,61309.0
8,I-3837,1.0,"BEEF STEW DICED 3/4""FR",1.0,Kg,1.0,Kg,MEAT,41.3463,329.5,1677.2,61309.0
9,I-43648,1.0,BRISKET BEEF CORN SMKD FR,4.5,Kg,1.0,Kg,MEAT,41.3463,329.5,1677.2,61309.0


In [42]:
ingredients = pd.read_csv("data/preprocessed/Ingredients_List.csv")
ingredients

Unnamed: 0,IngredientId,Qty,Uom,Conversion,InvFactor,Recipe
0,I-3643,225.0,g,0.001,0.1837,P-18907
1,I-6026,1000.0,g,1.0,0.8163,P-18907
2,I-3642,1.0,Kg,1000.0,0.0002,P-25993
3,I-6026,5.0,Kg,1000.0,0.0008,P-25993
4,I-1813,125.0,ml,0.033814,37.8788,P-26044
5,I-2612,2.25,Kg,1.0,0.6818,P-26044
6,I-3284,10.0,ml,0.002,3.0303,P-26044
7,I-3660,250.0,g,0.001,75.7576,P-26044
8,I-5983,625.0,ml,0.001,189.3939,P-26044
9,I-6820,60.0,g,0.001,18.1818,P-26044


In [43]:
map_list = mapping["ItemId"].unique()
absent_list = []

for item in ingredients["IngredientId"].unique():
    if item not in map_list:
        absent_list.append(item)
        
print(absent_list)

['P-18907', 'P-50495', 'P-26058', 'P-25993', 'P-9714', 'P-9713', 'P-28285', 'P-50598', 'P-58949', 'P-26055', 'P-26069', 'P-34892', 'P-44592', 'P-50310', 'P-50316', 'P-50317', 'P-26057', 'P-46862', 'R-30406', 'P-32739', 'P-50739', 'P-50636', 'P-50337', 'P-26205', 'P-44587', 'P-44913', 'P-46076', 'P-26063', 'P-26044', 'P-50513', 'P-48933', 'P-26514', 'P-52036', 'P-26020', 'P-44896', 'P-29315', 'P-34085', 'P-26077', 'P-48489', 'P-26231', 'P-32661', 'P-38556', 'P-58359', 'P-26090', 'P-26460', 'P-41095', 'P-34123', 'P-34880', 'P-42499', 'P-44728', 'P-46869', 'P-50583', 'P-50584', 'P-7444', 'P-26196', 'P-41096', 'P-9741', 'P-9765', 'P-24452', 'P-57146', 'P-19175', 'P-50587', 'P-34534', 'P-50429', 'P-51536', 'P-6888', 'P-8196', 'P-22247', 'P-44652', 'P-21077', 'P-41468', 'P-41466', 'P-57227', 'P-46492', 'P-48870', 'P-57214', 'P-24750', 'P-43440', 'P-10113', 'P-51582', 'P-56459', 'P-9710', 'P-9712', 'P-56887', 'P-9745', 'P-39167', 'P-41094', 'P-56741', 'P-15353', 'P-33557', 'P-53277', 'P-22534

In [44]:
path = os.path.join(os.getcwd(), "data", "mapping", "Mapping.csv")
mapping.to_csv(path, index = False, header = True)