# Climate-Friendly Food Systems (CFFS) Labelling Project

### The University of British Columbia

***

## Part III: Update Information and Mapping

## Set up and Import Libraries

In [1]:
#pip install -r requirements.txt

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
import os
import csv
from itertools import islice
from decimal import Decimal
import xml.etree.ElementTree as et
from xml.etree.ElementTree import parse
import openpyxl
import pytest
from datetime import datetime

In [2]:
# RUN ONLY ONCE
# os.chdir is used to change the current directory to the specified path
os.chdir("../") # Sets path to the repo folder as it is one level above where this file exists!
path = os.getcwd()
print(path)

/Users/vivaanwadhwa/Documents/GitHub/CFFS_sharon_2024


***
## Import Preprocessed Datasets

In [3]:
# Creates a DataFrame from reading a csv file 
Preps = pd.read_csv(os.path.join(os.getcwd(), "data", "cleaning", "Preps_Unit_Cleaned.csv"))
Preps.head()

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-61322,AIOLI|Pesto,4.0,L,ZDONT USE OK - PREP,4000.0,ml
1,P-26184,BAKED|Beans,9.0,Kg,PREP,9000.0,g
2,P-54644,BAKED|Moussaka,16.0,PTN,,16.0,PTN
3,P-54666,BAKED|Pasta|Chicken Alfredo,6.176,Kg,,6176.0,g
4,P-54664,BAKED|Pasta|Chorizo Penne,7.36,Kg,,7360.0,g


In [4]:
# Creates a DataFrame from reading a csv file 
ghge_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "ghge_factors.csv"))
ghge_factors.head()

Unnamed: 0,Category ID,Food Category,Active Total Supply Chain Emissions (kg CO2 / kg food)
0,1,beef & buffalo meat,41.3463
1,2,lamb/mutton & goat meat,41.6211
2,3,pork (pig meat),9.8315
3,4,"poultry (chicken, turkey)",4.3996
4,5,butter,11.4316


In [5]:
# Creates a DataFrame from reading a csv file 
nitro_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "nitrogen_factors.csv"))
nitro_factors.head()

Unnamed: 0,Category ID,Food Category,g N lost/kg product
0,1,beef & buffalo meat,329.5
1,2,lamb/mutton & goat meat,231.15
2,3,pork (pig meat),132.8
3,4,"poultry (chicken, turkey)",116.8
4,5,butter,100.35


In [6]:
# Creates a DataFrame from reading a csv file 
water_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "water_factors.csv"))
water_factors.head()

Unnamed: 0,Category ID,Food Category,Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
0,1,beef & buffalo meat,1677.2,61309.0
1,2,lamb/mutton & goat meat,461.2,258.9
2,3,pork (pig meat),1810.3,54242.7
3,4,"poultry (chicken, turkey)",370.3,333.5
4,5,butter,1010.176,50055.168


In [7]:
# Creates a DataFrame from reading a csv file 
land_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "land_factors.csv"))
land_factors.rename(columns={'km^2 land use/kg product': 'Land Use (m^2)'}, inplace=True)
land_factors['Land Use (m^2)'] *= 1000
land_factors.head()

Unnamed: 0,Category ID,Food Category,Land Use (m^2)
0,1,beef & buffalo meat,0.12645
1,2,lamb/mutton & goat meat,0.1432
2,3,pork (pig meat),0.02102
3,4,"poultry (chicken, turkey)",0.01151
4,5,butter,0.01395


In [8]:
# Load current Items List with assigned Emission Factors Category ID
Items_Assigned = pd.read_csv(os.path.join(os.getcwd(), "data", "mapping", "Items_List_Assigned.csv"))
Items_Assigned.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-57545,1,CHUCK FLAT BONELESS FZN,3.3,Kg,1.0,Kg,MEAT
1,I-10869,1,BEEF STIRFRY COV FR,5.0,Kg,1.0,Kg,MEAT
2,I-7064,1,BEEF OUTSIDE FLAT AAA,1.0,Kg,1.0,Kg,MEAT
3,I-37005,1,BEEF MEATBALLS,4.54,Kg,1000.0,g,MEAT
4,I-37002,1,BEEF INSIDE ROUND SHAVED,9.0,Kg,1000.0,g,MEAT


In [9]:
new_items = pd.read_csv("data/mapping/new items added/New_Items_2024/2024-07-26_New_Items_assigned.csv")
new_items

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category
0,I-72463,17.0,BEAN BLACK*READY TO EAT,6.0,pak,4.0,lb,FOOD - GROCERY,beans and pulses (dried)
1,I-72464,17.0,BEANS GARBANZO*READY TO EAT,6.0,pak,4.0,lb,FOOD - GROCERY,beans and pulses (dried)
2,I-72753,24.0,BRIOCHE BREAD BRAIDED SLCD,7.0,LOAF,10.0,slice,BREAD,"wheat/rye (bread, pasta, baked goods)"
3,I-73416,38.0,BROTH SHIO GARLIC PACKS,50.0,ea,1.0,ea,FOOD - GROCERY,root vegetables
4,I-73415,58.0,BROTH TAN TAN ORGANIC,3.0,JUG,2.5,L,FOOD - GROCERY,manually adjusted
5,I-70023,24.0,BUN BRIOCHE*HOTDOG 6 INCH,72.0,each,1.0,each,BREAD,"wheat/rye (bread, pasta, baked goods)"
6,I-72414,40.0,BURGER LIONS MANE,1.0,cs,1.0,cs,FOOD - GROCERY,manually adjusted
7,I-69728,17.0,BURRITO SPICY BEAN VEGAN,1.0,ea,1.0,ea,FOOD - GROCERY,beans and pulses (dried)
8,I-71937,6.0,CHEESE CHED*WHT OLD SHRED,4.0,bag,2.5,Kg,DAIRY,cheese
9,I-02341,6.0,CHEESE FETA*11KG,1.0,pail,11.0,Kg,DAIRY,cheese


In [10]:
item_nonstd = pd.read_csv("data/cleaning/Items_Nonstd.csv")
item_nonstd

Unnamed: 0,IngredientId,Qty,Uom,Conversion,InvFactor,Recipe,Description
0,I-11842,2.000,LOAF,1.0,0.8818,P-13648,SOURDOUGH LOAFCOUNTRY
1,I-1028,0.500,CT,1.0,0.5000,P-26170,NAAN ORIGINAL*TEARDROP
2,I-64492,1.000,LOAF,1.0,0.0625,P-26234,LOAF GARLIC BREAD
3,I-19735,1.000,CT,1.0,0.1000,P-26318,PASTRY PUFF 10X15 IN
4,I-15358,50.000,ea,1.0,55.1268,P-26393,TORTILLA CORN*WHTE 6IN EX THN
...,...,...,...,...,...,...,...
57,I-72414,0.025,cs,1.0,1.0000,R-73217,BURGER LIONS MANE
58,I-62928,1.000,ea,1.0,1.0000,R-73410,BAGEL EVERYTHING*SLCD
59,I-73417,1.000,ea,1.0,1.0000,R-73424,NOODLES RAMEN WHITE
60,I-73414,1.000,ea,1.0,1.0000,R-73425,NOODLES RAMEN KALE&SPINACH


In [11]:
preps_nonstd = pd.read_csv("data/cleaning/Preps_NonstdUom.csv")
preps_nonstd

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-72899,Crusted|Halibut|LM,50.0,PTN,,50.0,PTN
1,P-72896,Pistachio|Breaded|Tofu,100.0,PTN,,100.0,PTN
2,P-72885,Risotto|Cake|LM,45.0,PTN,,45.0,PTN


In [12]:
conv_updatecov = pd.read_csv("data/cleaning/update/Conv_UpdateConv.csv")
conv_updatecov

Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom
0,I-67659,0.008818,1.000,each,113.398,g
1,I-28697,0.005181,1.000,ea,193.000,g
2,I-47441,0.005181,1.000,ea,193.000,g
3,I-1905,0.012500,1.000,CT,80.000,g
4,I-47440,0.005051,1.000,ea,198.000,g
...,...,...,...,...,...,...
574,I-72414,0.000357,0.025,cs,70.000,g
575,I-62928,0.010526,1.000,ea,95.000,g
576,I-73417,0.007143,1.000,ea,140.000,g
577,I-73414,0.007143,1.000,ea,140.000,g


In [13]:
conv_updatecov.loc[conv_updatecov["ConversionId"] == "P-54581"]

Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom


In [14]:
Preps.loc[Preps["PrepId"] == "P-54581"]

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
576,P-54581,SLICE|Multigrain Bread,22.0,slice,,22.0,slice


***
## Import Update Info

In [15]:
# Import list of prep that need convert uom to standard uom manually
Manual_PrepU = pd.read_csv(os.path.join(os.getcwd(), "data", "cleaning", "update", "Preps_UpdateUom.csv"))
# Here we can see that UOM examples are: each, ea, slice
Manual_PrepU.head()

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-54697,LEMON|Wedge 1/8,8.0,each,PREP,84.0,g
1,P-35132,MARINATED|Lemon & Herb Chx,185.0,ea,PREP,24050.0,g
2,P-51992,YIELD|Bread|Sourdough 5/8,36.0,slice,,1620.0,g
3,P-26234,BATCH|Roasted Garlic Bread,16.0,ea,PREP,1280.0,g
4,P-26170,GRILLED|NaanBread,1.0,ea,PREP,125.0,g


In [16]:
# The row of the DatFrame that contains the PrepId == P-54581
Manual_PrepU.loc[Manual_PrepU["PrepId"] == "P-54581"]

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
212,P-54581,SLICE|Multigrain Bread,22.0,slice,,38.0,g


In [17]:
# Select the file path for new items list with category id
New_Items_Added = pd.read_csv("data/mapping/new items added/New_Items_2024/2024-07-26_New_Items_assigned.csv")
New_Items_Added.tail(15)

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category
30,I-72760,3.0,PEPPERONI SLICES*DRY,2.0,bag,3.4,Kg,MEAT,manually adjusted
31,I-01293,31.0,PINEAPPLE PIZZA CUT L/S TFC,6.0,LG CAN,2.84,L,FOOD - GROCERY,other fruits
32,I-26887,51.0,SAKE,1.0,BIB 20L,18.0,L,"PACKAGED BEER, CIDER",wine grapes (wine)
33,I-2699,58.0,SAUCE PESTO*SUPREME W/NUTS,6.0,POUCH,454.0,g,FOOD - GROCERY,sauces & paste
34,I-71070,58.0,SAUCE PILI CHILI VEGAN KULA,4.0,L,1.0,L,FOOD - GROCERY,sauces & paste
35,I-72222,54.0,SEASONING GREKO LEMON RST POTA,2.0,BG,2270.0,g,SPICES,stimulants & spices misc.
36,I-07630,20.0,TMRW BEEF*GRND VEGAN FZ,2.0,bag,5.0,lb,MEAT,manually adjusted
37,I-71932,20.0,TMRW PROTEIN*SHRED PLANTBASE,4.0,bag,2.5,lb,MEAT,manually adjusted
38,I-2953,57.0,VINEGAR BALSAMIC WHITE,5.0,L,1.0,L,FOOD - GROCERY,vinegar
39,I-62651,31.0,WATER COCONUT TETRA 1L,12.0,tetra,1.0,tetra,BEVERAGE,other fruits


In [19]:
# Import list of items that adjusted GHGe factor manually
# Manual_Factor = pd.read_csv(os.path.join(os.getcwd(), "data", "mapping", "Manual_Adjust_Factors.csv"))
# Manual_Factor['Land Use (m^2)'] = 0
# Manual_Factor.head()

### Update Correct Uom for Preps

In [20]:
# Update prep list with manually adjusted uom
for index, row in Manual_PrepU.iterrows():
    PrepId = Manual_PrepU.loc[index, 'PrepId']
    qty = Manual_PrepU.loc[index, 'StdQty']
    uom = Manual_PrepU.loc[index, 'StdUom']
    Preps.loc[Preps['PrepId'] == PrepId, 'StdQty'] = qty
    Preps.loc[Preps['PrepId'] == PrepId, 'StdUom'] = uom

In [21]:
Preps.drop_duplicates(subset=['PrepId'], inplace=True,)

In [22]:
Preps.head()

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-61322,AIOLI|Pesto,4.0,L,ZDONT USE OK - PREP,4000.0,ml
1,P-26184,BAKED|Beans,9.0,Kg,PREP,9000.0,g
2,P-54644,BAKED|Moussaka,16.0,PTN,,6720.0,g
3,P-54666,BAKED|Pasta|Chicken Alfredo,6.176,Kg,,6176.0,g
4,P-54664,BAKED|Pasta|Chorizo Penne,7.36,Kg,,7360.0,g


In [23]:
Preps.shape

(776, 7)

In [24]:
path = os.path.join(os.getcwd(), "data", "cleaning", "Preps_List_Cleaned.csv")
Preps.to_csv(path, index = False, header = True)

In [25]:
Items_Assigned.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-57545,1,CHUCK FLAT BONELESS FZN,3.3,Kg,1.0,Kg,MEAT
1,I-10869,1,BEEF STIRFRY COV FR,5.0,Kg,1.0,Kg,MEAT
2,I-7064,1,BEEF OUTSIDE FLAT AAA,1.0,Kg,1.0,Kg,MEAT
3,I-37005,1,BEEF MEATBALLS,4.54,Kg,1000.0,g,MEAT
4,I-37002,1,BEEF INSIDE ROUND SHAVED,9.0,Kg,1000.0,g,MEAT


In [26]:
New_Items_Added.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category
0,I-72463,17.0,BEAN BLACK*READY TO EAT,6.0,pak,4.0,lb,FOOD - GROCERY,beans and pulses (dried)
1,I-72464,17.0,BEANS GARBANZO*READY TO EAT,6.0,pak,4.0,lb,FOOD - GROCERY,beans and pulses (dried)
2,I-72753,24.0,BRIOCHE BREAD BRAIDED SLCD,7.0,LOAF,10.0,slice,BREAD,"wheat/rye (bread, pasta, baked goods)"
3,I-73416,38.0,BROTH SHIO GARLIC PACKS,50.0,ea,1.0,ea,FOOD - GROCERY,root vegetables
4,I-73415,58.0,BROTH TAN TAN ORGANIC,3.0,JUG,2.5,L,FOOD - GROCERY,manually adjusted


### Import List of New Items with Emission Factors Category ID Assigned

In [27]:
frames = [Items_Assigned, New_Items_Added]
Items_Assigned_Updated = pd.concat(frames).reset_index(drop=True, inplace=False).drop_duplicates()
Items_Assigned_Updated.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category
0,I-57545,1.0,CHUCK FLAT BONELESS FZN,3.3,Kg,1.0,Kg,MEAT,
1,I-10869,1.0,BEEF STIRFRY COV FR,5.0,Kg,1.0,Kg,MEAT,
2,I-7064,1.0,BEEF OUTSIDE FLAT AAA,1.0,Kg,1.0,Kg,MEAT,
3,I-37005,1.0,BEEF MEATBALLS,4.54,Kg,1000.0,g,MEAT,
4,I-37002,1.0,BEEF INSIDE ROUND SHAVED,9.0,Kg,1000.0,g,MEAT,


In [28]:
Items_Assigned_Updated

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category
0,I-57545,1.0,CHUCK FLAT BONELESS FZN,3.300,Kg,1.0,Kg,MEAT,
1,I-10869,1.0,BEEF STIRFRY COV FR,5.000,Kg,1.0,Kg,MEAT,
2,I-7064,1.0,BEEF OUTSIDE FLAT AAA,1.000,Kg,1.0,Kg,MEAT,
3,I-37005,1.0,BEEF MEATBALLS,4.540,Kg,1000.0,g,MEAT,
4,I-37002,1.0,BEEF INSIDE ROUND SHAVED,9.000,Kg,1000.0,g,MEAT,
...,...,...,...,...,...,...,...,...,...
2878,I-09837,55.0,WATER FOR RECIPES,1.000,L,1.0,L,BEVERAGE,water & beverages
2879,I-72924,20.0,WAYGU TERIYAKI*STRIP PLNTBASED,6.000,bag,1.0,Kg,MEAT,manually adjusted
2880,I-73324,20.0,WAYGU UMAMI* STRIP PLNTBASED,6.000,bag,1.0,Kg,MEAT,manually adjusted
2881,I-62559,54.0,YEAST FLAKE,3.307,lb,1.0,lb,FOOD - GROCERY,stimulants & spices misc.


In [29]:
Items_Assigned_Updated.shape

(2883, 9)

In [30]:
# Double brackets used to specify the column as a dataframe and not a series
# converting the 'CategoryID' column in the Items_Assigned_Updated dataframe to numeric data type using the pd.to_numeric()
# use the apply column to apply the coversion to the entire column
Items_Assigned_Updated[['CategoryID']] = Items_Assigned_Updated[['CategoryID']].apply(pd.to_numeric)

In [31]:
Items_Assigned_Updated

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category
0,I-57545,1.0,CHUCK FLAT BONELESS FZN,3.300,Kg,1.0,Kg,MEAT,
1,I-10869,1.0,BEEF STIRFRY COV FR,5.000,Kg,1.0,Kg,MEAT,
2,I-7064,1.0,BEEF OUTSIDE FLAT AAA,1.000,Kg,1.0,Kg,MEAT,
3,I-37005,1.0,BEEF MEATBALLS,4.540,Kg,1000.0,g,MEAT,
4,I-37002,1.0,BEEF INSIDE ROUND SHAVED,9.000,Kg,1000.0,g,MEAT,
...,...,...,...,...,...,...,...,...,...
2878,I-09837,55.0,WATER FOR RECIPES,1.000,L,1.0,L,BEVERAGE,water & beverages
2879,I-72924,20.0,WAYGU TERIYAKI*STRIP PLNTBASED,6.000,bag,1.0,Kg,MEAT,manually adjusted
2880,I-73324,20.0,WAYGU UMAMI* STRIP PLNTBASED,6.000,bag,1.0,Kg,MEAT,manually adjusted
2881,I-62559,54.0,YEAST FLAKE,3.307,lb,1.0,lb,FOOD - GROCERY,stimulants & spices misc.


In [32]:
path = os.path.join(os.getcwd(), "data", "mapping", "Items_List_Assigned.csv")
Items_Assigned_Updated.to_csv(path, index = False, header = True)

## Mapping Items to Footprint Factors

In [34]:
Items_Assigned_Updated

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category
0,I-57545,1.0,CHUCK FLAT BONELESS FZN,3.300,Kg,1.0,Kg,MEAT,
1,I-10869,1.0,BEEF STIRFRY COV FR,5.000,Kg,1.0,Kg,MEAT,
2,I-7064,1.0,BEEF OUTSIDE FLAT AAA,1.000,Kg,1.0,Kg,MEAT,
3,I-37005,1.0,BEEF MEATBALLS,4.540,Kg,1000.0,g,MEAT,
4,I-37002,1.0,BEEF INSIDE ROUND SHAVED,9.000,Kg,1000.0,g,MEAT,
...,...,...,...,...,...,...,...,...,...
2878,I-09837,55.0,WATER FOR RECIPES,1.000,L,1.0,L,BEVERAGE,water & beverages
2879,I-72924,20.0,WAYGU TERIYAKI*STRIP PLNTBASED,6.000,bag,1.0,Kg,MEAT,manually adjusted
2880,I-73324,20.0,WAYGU UMAMI* STRIP PLNTBASED,6.000,bag,1.0,Kg,MEAT,manually adjusted
2881,I-62559,54.0,YEAST FLAKE,3.307,lb,1.0,lb,FOOD - GROCERY,stimulants & spices misc.


In [37]:
# DEFINITION OF MAPPING: assigning certain attributes to data points based on criteria or predefined rules.

# Map GHG footprint factors
# merges two data frames Items_Assigned_Updated and ghge_factors on the columns CategoryID and Category ID, respectively. The 
# resulting data frame is stored in mapping.

# how='left' specifies that a left join is done between Items_Assigned_Updated and ghge_factors.
# left_on='CategoryID' specifies that the join condition for Items_Assigned_Updated should be based on the 'CategoryID' column.
# right_on='Category ID' specifies that the join condition for ghge_factors should be based on the 'Category ID' column.

mapping = pd.merge(Items_Assigned_Updated, ghge_factors.loc[:,['Category ID','Food Category','Active Total Supply Chain Emissions (kg CO2 / kg food)']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')
# if row in mapping has CategoryId == NaN then it assigns the value of the column Active Total Supply Chain Emissions (kg CO2 / kg food)
# to zero
for index in mapping.index:
    if np.isnan(mapping.loc[index,'Category ID']):
        mapping.loc[index,'Active Total Supply Chain Emissions (kg CO2 / kg food)'] = 0
# drops the columns Category ID and Food Category
display(mapping)
mapping = mapping.drop(columns=['Category ID', 'Food Category_x'])
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_x,Category ID,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food)
0,I-57545,1.0,CHUCK FLAT BONELESS FZN,3.300,Kg,1.0,Kg,MEAT,,1.0,beef & buffalo meat,41.3463
1,I-10869,1.0,BEEF STIRFRY COV FR,5.000,Kg,1.0,Kg,MEAT,,1.0,beef & buffalo meat,41.3463
2,I-7064,1.0,BEEF OUTSIDE FLAT AAA,1.000,Kg,1.0,Kg,MEAT,,1.0,beef & buffalo meat,41.3463
3,I-37005,1.0,BEEF MEATBALLS,4.540,Kg,1000.0,g,MEAT,,1.0,beef & buffalo meat,41.3463
4,I-37002,1.0,BEEF INSIDE ROUND SHAVED,9.000,Kg,1000.0,g,MEAT,,1.0,beef & buffalo meat,41.3463
...,...,...,...,...,...,...,...,...,...,...,...,...
2878,I-09837,55.0,WATER FOR RECIPES,1.000,L,1.0,L,BEVERAGE,water & beverages,55.0,water & beverages,0.0000
2879,I-72924,20.0,WAYGU TERIYAKI*STRIP PLNTBASED,6.000,bag,1.0,Kg,MEAT,manually adjusted,20.0,soybeans/tofu,1.7542
2880,I-73324,20.0,WAYGU UMAMI* STRIP PLNTBASED,6.000,bag,1.0,Kg,MEAT,manually adjusted,20.0,soybeans/tofu,1.7542
2881,I-62559,54.0,YEAST FLAKE,3.307,lb,1.0,lb,FOOD - GROCERY,stimulants & spices misc.,54.0,stimulants & spices misc.,9.3703


Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food)
0,I-57545,1.0,CHUCK FLAT BONELESS FZN,3.300,Kg,1.0,Kg,MEAT,beef & buffalo meat,41.3463
1,I-10869,1.0,BEEF STIRFRY COV FR,5.000,Kg,1.0,Kg,MEAT,beef & buffalo meat,41.3463
2,I-7064,1.0,BEEF OUTSIDE FLAT AAA,1.000,Kg,1.0,Kg,MEAT,beef & buffalo meat,41.3463
3,I-37005,1.0,BEEF MEATBALLS,4.540,Kg,1000.0,g,MEAT,beef & buffalo meat,41.3463
4,I-37002,1.0,BEEF INSIDE ROUND SHAVED,9.000,Kg,1000.0,g,MEAT,beef & buffalo meat,41.3463
...,...,...,...,...,...,...,...,...,...,...
2878,I-09837,55.0,WATER FOR RECIPES,1.000,L,1.0,L,BEVERAGE,water & beverages,0.0000
2879,I-72924,20.0,WAYGU TERIYAKI*STRIP PLNTBASED,6.000,bag,1.0,Kg,MEAT,soybeans/tofu,1.7542
2880,I-73324,20.0,WAYGU UMAMI* STRIP PLNTBASED,6.000,bag,1.0,Kg,MEAT,soybeans/tofu,1.7542
2881,I-62559,54.0,YEAST FLAKE,3.307,lb,1.0,lb,FOOD - GROCERY,stimulants & spices misc.,9.3703


In [38]:
# Map nitrogen footprint factors
mapping = pd.merge(mapping, nitro_factors.loc[:,['Category ID','Food Category','g N lost/kg product']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')

for index in mapping.index:
    if np.isnan(mapping.loc[index,'Category ID']):
        mapping.loc[index,'g N lost/kg product'] = 0

mapping = mapping.drop(columns=['Category ID', 'Food Category'])
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product
0,I-57545,1.0,CHUCK FLAT BONELESS FZN,3.300,Kg,1.0,Kg,MEAT,beef & buffalo meat,41.3463,329.50
1,I-10869,1.0,BEEF STIRFRY COV FR,5.000,Kg,1.0,Kg,MEAT,beef & buffalo meat,41.3463,329.50
2,I-7064,1.0,BEEF OUTSIDE FLAT AAA,1.000,Kg,1.0,Kg,MEAT,beef & buffalo meat,41.3463,329.50
3,I-37005,1.0,BEEF MEATBALLS,4.540,Kg,1000.0,g,MEAT,beef & buffalo meat,41.3463,329.50
4,I-37002,1.0,BEEF INSIDE ROUND SHAVED,9.000,Kg,1000.0,g,MEAT,beef & buffalo meat,41.3463,329.50
...,...,...,...,...,...,...,...,...,...,...,...
2878,I-09837,55.0,WATER FOR RECIPES,1.000,L,1.0,L,BEVERAGE,water & beverages,0.0000,0.00
2879,I-72924,20.0,WAYGU TERIYAKI*STRIP PLNTBASED,6.000,bag,1.0,Kg,MEAT,soybeans/tofu,1.7542,5.90
2880,I-73324,20.0,WAYGU UMAMI* STRIP PLNTBASED,6.000,bag,1.0,Kg,MEAT,soybeans/tofu,1.7542,5.90
2881,I-62559,54.0,YEAST FLAKE,3.307,lb,1.0,lb,FOOD - GROCERY,stimulants & spices misc.,9.3703,6.75


In [39]:
# Map land footprint factors
mapping = pd.merge(mapping, land_factors.loc[:,['Category ID','Food Category','Land Use (m^2)']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')

for index in mapping.index:
    if np.isnan(mapping.loc[index,'Category ID']):
        mapping.loc[index,'Land Use (m^2)'] = 0

mapping = mapping.drop(columns=['Category ID', 'Food Category'])
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2)
0,I-57545,1.0,CHUCK FLAT BONELESS FZN,3.300,Kg,1.0,Kg,MEAT,beef & buffalo meat,41.3463,329.50,0.12645
1,I-10869,1.0,BEEF STIRFRY COV FR,5.000,Kg,1.0,Kg,MEAT,beef & buffalo meat,41.3463,329.50,0.12645
2,I-7064,1.0,BEEF OUTSIDE FLAT AAA,1.000,Kg,1.0,Kg,MEAT,beef & buffalo meat,41.3463,329.50,0.12645
3,I-37005,1.0,BEEF MEATBALLS,4.540,Kg,1000.0,g,MEAT,beef & buffalo meat,41.3463,329.50,0.12645
4,I-37002,1.0,BEEF INSIDE ROUND SHAVED,9.000,Kg,1000.0,g,MEAT,beef & buffalo meat,41.3463,329.50,0.12645
...,...,...,...,...,...,...,...,...,...,...,...,...
2878,I-09837,55.0,WATER FOR RECIPES,1.000,L,1.0,L,BEVERAGE,water & beverages,0.0000,0.00,0.00000
2879,I-72924,20.0,WAYGU TERIYAKI*STRIP PLNTBASED,6.000,bag,1.0,Kg,MEAT,soybeans/tofu,1.7542,5.90,0.00414
2880,I-73324,20.0,WAYGU UMAMI* STRIP PLNTBASED,6.000,bag,1.0,Kg,MEAT,soybeans/tofu,1.7542,5.90,0.00414
2881,I-62559,54.0,YEAST FLAKE,3.307,lb,1.0,lb,FOOD - GROCERY,stimulants & spices misc.,9.3703,6.75,0.03100


In [40]:
# Map water footprint factors
# mapping: DataFrame that is used to assign sustainability-related factors (greenhouse gas emissions, nitrogen loss, and 
# water footprint) to food items based on their category and other attributes. 
mapping = pd.merge(mapping, water_factors.loc[:,['Category ID','Food Category','Freshwater Withdrawals (L/FU)', 'Stress-Weighted Water Use (L/FU)']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')

for index in mapping.index:
    if np.isnan(mapping.loc[index,'Category ID']):
        mapping.loc[index,'Freshwater Withdrawals (L/FU)'] = 0
        mapping.loc[index,'Stress-Weighted Water Use (L/FU)'] = 0

mapping = mapping.drop(columns=['Category ID', 'Food Category'])
mapping.drop_duplicates(subset=["ItemId"], inplace=True)
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2),Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
0,I-57545,1.0,CHUCK FLAT BONELESS FZN,3.300,Kg,1.0,Kg,MEAT,beef & buffalo meat,41.3463,329.50,0.12645,1677.200,61309.000
1,I-10869,1.0,BEEF STIRFRY COV FR,5.000,Kg,1.0,Kg,MEAT,beef & buffalo meat,41.3463,329.50,0.12645,1677.200,61309.000
2,I-7064,1.0,BEEF OUTSIDE FLAT AAA,1.000,Kg,1.0,Kg,MEAT,beef & buffalo meat,41.3463,329.50,0.12645,1677.200,61309.000
3,I-37005,1.0,BEEF MEATBALLS,4.540,Kg,1000.0,g,MEAT,beef & buffalo meat,41.3463,329.50,0.12645,1677.200,61309.000
4,I-37002,1.0,BEEF INSIDE ROUND SHAVED,9.000,Kg,1000.0,g,MEAT,beef & buffalo meat,41.3463,329.50,0.12645,1677.200,61309.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2878,I-09837,55.0,WATER FOR RECIPES,1.000,L,1.0,L,BEVERAGE,water & beverages,0.0000,0.00,0.00000,1.000,1.000
2879,I-72924,20.0,WAYGU TERIYAKI*STRIP PLNTBASED,6.000,bag,1.0,Kg,MEAT,soybeans/tofu,1.7542,5.90,0.00414,6.600,32.400
2880,I-73324,20.0,WAYGU UMAMI* STRIP PLNTBASED,6.000,bag,1.0,Kg,MEAT,soybeans/tofu,1.7542,5.90,0.00414,6.600,32.400
2881,I-62559,54.0,YEAST FLAKE,3.307,lb,1.0,lb,FOOD - GROCERY,stimulants & spices misc.,9.3703,6.75,0.03100,24.900,220.300


In [41]:
mapping[mapping["ItemId"] == "I-4524"]

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2),Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
2006,I-4524,38.0,BEETS GOLDBC,5.0,lb,1.0,lb,PRODUCE,root vegetables,0.3062,7.9,0.00032,9.9,37.9


In [42]:
mapping["CategoryID"].isnull().sum()

0

### Manully Adjust Footprint Factor for Specific Items

In [43]:
# For Manual_Factor: dataframe with items that adjusted GHGe factor manually.
# It takes the id for Manual_Factor for the index being iterated and if it is equal to item id of mapping dataframe then it sets
# the values of the columns to the manually adjusted values
# Note: the values for the columns in mapping DataFrame is adjusted and not Manual_Factor DataFrame
for index, row in Manual_Factor.iterrows():
    itemId = Manual_Factor.loc[index, 'ItemId']
    ghge = Manual_Factor.loc[index, 'Active Total Supply Chain Emissions (kg CO2 / kg food)']
    nitro = Manual_Factor.loc[index, 'g N lost/kg product']
    water = Manual_Factor.loc[index, 'Freshwater Withdrawals (L/FU)']
    land = Manual_Factor.loc[index, 'Land Use (m^2)']
    str_water = Manual_Factor.loc[index, 'Stress-Weighted Water Use (L/FU)']
    mapping.loc[mapping['ItemId'] == itemId, 'Active Total Supply Chain Emissions (kg CO2 / kg food)'] = ghge
    mapping.loc[mapping['ItemId'] == itemId, 'g N lost/kg product'] = nitro
    mapping.loc[mapping['ItemId'] == itemId, 'Freshwater Withdrawals (L/FU)'] = water
    mapping.loc[mapping['ItemId'] == itemId, 'Stress-Weighted Water Use (L/FU)'] = str_water
    mapping.loc[mapping['ItemId'] == itemId, 'Land Use (m^2)'] = land

In [44]:
mapping.drop_duplicates(subset = ['ItemId'], inplace=True)
mapping.dtypes

ItemId                                                     object
CategoryID                                                float64
Description                                                object
CaseQty                                                   float64
CaseUOM                                                    object
PakQty                                                    float64
PakUOM                                                     object
InventoryGroup                                             object
Food Category_y                                            object
Active Total Supply Chain Emissions (kg CO2 / kg food)    float64
g N lost/kg product                                       float64
Land Use (m^2)                                            float64
Freshwater Withdrawals (L/FU)                             float64
Stress-Weighted Water Use (L/FU)                          float64
dtype: object

In [45]:
mapping.shape

(2883, 14)

In [46]:
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2),Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
0,I-57545,1.0,CHUCK FLAT BONELESS FZN,3.300,Kg,1.0,Kg,MEAT,beef & buffalo meat,41.3463,329.50,0.12645,1677.200,61309.000
1,I-10869,1.0,BEEF STIRFRY COV FR,5.000,Kg,1.0,Kg,MEAT,beef & buffalo meat,41.3463,329.50,0.12645,1677.200,61309.000
2,I-7064,1.0,BEEF OUTSIDE FLAT AAA,1.000,Kg,1.0,Kg,MEAT,beef & buffalo meat,41.3463,329.50,0.12645,1677.200,61309.000
3,I-37005,1.0,BEEF MEATBALLS,4.540,Kg,1000.0,g,MEAT,beef & buffalo meat,41.3463,329.50,0.12645,1677.200,61309.000
4,I-37002,1.0,BEEF INSIDE ROUND SHAVED,9.000,Kg,1000.0,g,MEAT,beef & buffalo meat,41.3463,329.50,0.12645,1677.200,61309.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2878,I-09837,55.0,WATER FOR RECIPES,1.000,L,1.0,L,BEVERAGE,water & beverages,0.0000,0.00,0.00000,1.000,1.000
2879,I-72924,20.0,WAYGU TERIYAKI*STRIP PLNTBASED,6.000,bag,1.0,Kg,MEAT,soybeans/tofu,1.7542,5.90,0.00414,6.600,32.400
2880,I-73324,20.0,WAYGU UMAMI* STRIP PLNTBASED,6.000,bag,1.0,Kg,MEAT,soybeans/tofu,1.7542,5.90,0.00414,6.600,32.400
2881,I-62559,54.0,YEAST FLAKE,3.307,lb,1.0,lb,FOOD - GROCERY,stimulants & spices misc.,9.3703,6.75,0.03100,24.900,220.300


In [47]:
ingredients = pd.read_csv("data/preprocessed/Ingredients_List.csv")
ingredients

Unnamed: 0,IngredientId,Qty,Uom,Conversion,InvFactor,Recipe
0,P-48933,1.00,Kg,1.000000,1.0000,P-10113
1,P-18746,1.00,Kg,1.000000,1.0000,P-10241
2,I-3388,1.00,L,1.000000,0.3058,P-10496
3,I-4658,2.27,Kg,2.204620,0.6942,P-10496
4,I-12176,4.00,g,0.001000,0.3636,P-12112
...,...,...,...,...,...,...
5107,I-7787,50.00,ml,0.001000,1.0000,R-73430
5108,I-8281,50.00,g,0.001000,1.0000,R-73430
5109,P-38661,25.00,mg,0.000001,1.0000,R-73430
5110,P-73445,150.00,g,0.001000,1.0000,R-73452


In [49]:
map_list = mapping["ItemId"].unique()
absent_list = []

for item in ingredients["IngredientId"].unique():
    if item not in map_list and item.startswith("I-"):
        absent_list.append(item)

# absent_list contains the IngredientIds that are not present in mapping but are present in the ingredients DataFrame
print(absent_list)

[]


In [50]:
# Converts mapping DataFrame to the Mapping.csv file
path = os.path.join(os.getcwd(), "data", "mapping", "Mapping.csv")
mapping.to_csv(path, index = False, header = True)