# Climate-Friendly Food Systems (CFFS) Labelling Project

### The University of British Columbia

***

## Part III: Update Information and Mapping

## Set up and Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import  os
import xml.etree.ElementTree as et
from xml.etree.ElementTree import parse
from datetime import datetime

In [2]:
# RUN ONLY ONCE
# os.chdir is used to change the current directory to the specified path
os.chdir("../") # Sets path to the repo folder as it is one level above where this file exists!
path = os.getcwd()
print(path)

/Users/vivaanwadhwa/Documents/GitHub/CFFS_sharon_2024


***
## Import Preprocessed Datasets

In [3]:
# Creates a DataFrame from reading a csv file 
Preps = pd.read_csv(os.path.join(os.getcwd(), "data", "cleaning", "AMS_data", "Preps_Unit_Cleaned.csv"))
Preps.head()

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-6068,Pro - Kansas City BBQ,1.25,L,Y,1250.0,ml
1,P-2824,2023 Alfredo Sauce Gal.,2250.0,ml,Y,2250.0,ml
2,P-1409,2023 Basmati Prep,2.0,Kg,Y,2000.0,g
3,P-17360,2023 Beef Gravy (prep),4.5,L,Y,4500.0,ml
4,P-16778,2023 Candied walnut,1.0,Kg,Y,1000.0,g


In [4]:
# Creates a DataFrame from reading a csv file 
ghge_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "ghge_factors.csv"))
ghge_factors.head()

Unnamed: 0,Category ID,Food Category,Active Total Supply Chain Emissions (kg CO2 / kg food)
0,1,beef & buffalo meat,41.3463
1,2,lamb/mutton & goat meat,41.6211
2,3,pork (pig meat),9.8315
3,4,"poultry (chicken, turkey)",4.3996
4,5,butter,11.4316


In [5]:
# Creates a DataFrame from reading a csv file 
nitro_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "nitrogen_factors.csv"))
nitro_factors.head()

Unnamed: 0,Category ID,Food Category,g N lost/kg product
0,1,beef & buffalo meat,329.5
1,2,lamb/mutton & goat meat,231.15
2,3,pork (pig meat),132.8
3,4,"poultry (chicken, turkey)",116.8
4,5,butter,100.35


In [6]:
# Creates a DataFrame from reading a csv file 
water_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "water_factors.csv"))
water_factors.head()

Unnamed: 0,Category ID,Food Category,Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
0,1,beef & buffalo meat,1677.2,61309.0
1,2,lamb/mutton & goat meat,461.2,258.9
2,3,pork (pig meat),1810.3,54242.7
3,4,"poultry (chicken, turkey)",370.3,333.5
4,5,butter,1010.176,50055.168


In [7]:
# Creates a DataFrame from reading a csv file 
land_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "land_factors.csv"))
land_factors.rename(columns={'km^2 land use/kg product': 'Land Use (m^2)'}, inplace=True)
land_factors['Land Use (m^2)'] *= 1000
land_factors.head()

Unnamed: 0,Category ID,Food Category,Land Use (m^2)
0,1,beef & buffalo meat,0.12645
1,2,lamb/mutton & goat meat,0.1432
2,3,pork (pig meat),0.02102
3,4,"poultry (chicken, turkey)",0.01151
4,5,butter,0.01395


In [8]:
# Load current Items List with assigned Emission Factors Category ID
Items_Assigned = pd.read_csv(os.path.join(os.getcwd(), "data", "mapping", "AMS_data", "Items_List_Assigned.csv"))
Items_Assigned.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N,
1,I-8228,55,Grey Goose 1.14L,1.0,fl oz,1.0,fl oz,N,
2,I-15803,31,Red Bull Watermelon,1.0,can,1.0,can,N,
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N,
4,I-8667,58,ITEM GARLIC MAYO,2.0,fl oz,1.0,ml,N,


In [9]:
# new_items = pd.read_csv("data/mapping/new items added/New_Items_Added_11.csv")
# new_items = pd.read_csv("data/mapping/new items added/New_Items_2023/New_Items_Added_2023_08_01.csv")
# CHECK FOR CHANGES REQUIRES

# # COMMENTED OUT LINES BELOW, commented it out this time because no new items were added

new_items = pd.read_csv("data/mapping/AMS_data/new items/2024-07-09_New_Items.csv")
new_items.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-4105,,SPICE GARLIC GRANULATED,5.0,g,1.25,L,Y
1,I-4106,,SPICE Pepper Black Grnd,10.0,g,1.25,L,Y
2,I-4107,,SPICE Nutmeg ground,2.0,g,2250.0,ml,Y
3,I-4108,,GRAVY MIX TRIO BROWN 1 STEP,490.0,g,4.5,L,Y
4,I-4109,,ORANGES 5lb,0.5,ea,1.0,ea,Y


In [10]:
item_nonstd = pd.read_csv("data/cleaning/AMS_data/Items_Nonstd.csv")
item_nonstd

Unnamed: 0,IngredientId,Qty,Uom,Recipe,Description
0,I-14127,1.0,ea,R-14296,PITA POCKETS THIN
1,I-14126,1.0,ea,R-14296,"PITA THICK 5"""
2,I-2087,0.01,bunch,R-14296,PARSLEY
3,I-2121,1.0,slice,R-7065,PICKLE DILL SANDW LONG SLCD
4,I-2992,2.0,slice,R-7065,Bacon Pre-Ckd 30-34 ct
5,I-3498,1.0,slice,R-7065,Cheese Cheddar Slices 21gm
6,I-14948,1.0,ea,R-7065,Brioche Hamburger Bun
7,I-13813,0.09,LBS,R-4598,Tomato Baby Gem Mix HH
8,I-4207,1.0,ea,R-1916,Belgian Waffle Ind.Wrapped 70g
9,I-4172,0.05,ea,R-7572,Bread Focaccia Slab 10x16


In [11]:
preps_nonstd = pd.read_csv("data/cleaning/AMS_data/Preps_NonstdUom.csv")
preps_nonstd

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-18275,2023 Beets Salad Prep,1.0,ea,Y,,
1,P-5506,2023 Blackened Carbonara Prep,1.0,ea,Y,,
2,P-8990,2023 Chicken Caesar Prep,1.0,ea,Y,,
3,P-14560,2023 Chicken Caesar wrap Prep,1.0,PORT,Y,,
4,P-10589,2023 Chicken Po'Boy Prep,1.0,PORT,Y,,
5,P-18349,2023 Cooked Linguini,80.0,PORT,Y,,
6,P-11706,2023 Gallery Fries (Side),1.0,ea,Y,,
7,P-5012,2023 Gallery Nachos Prep (Sml),1.0,ea,Y,,
8,P-16794,2023 Grilled Pineapple prep,15.0,PORT,Y,,
9,P-4657,2023 Jasmine Rice (Prep),12.0,PORT,Y,,


In [12]:
conv_updatecov = pd.read_csv("data/cleaning/update/AMS_data/Conv_UpdateConv.csv")
conv_updatecov

Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom
0,I-1971,0.003731,0.25,ea,16.75,g
1,I-15803,0.004000,1.00,can,250.00,ml
2,I-5505,0.001263,0.25,HEAD,49.50,g
3,I-11706,0.008547,1.00,ea,117.00,g
4,I-13308,0.024540,1.00,ea,40.75,g
...,...,...,...,...,...,...
158,I-6203,0.000529,1.00,1.89L,1890.00,ml
159,I-4128,0.002205,50.00,LBS,22679.50,g
160,I-4226,0.002205,50.00,LBS,22679.50,g
161,I-4234,0.002000,3.00,HEAD,1500.00,g


# Update Conversion_Added.csv

In [13]:
conversions_added = pd.read_csv(os.path.join(os.getcwd(), "data", "cleaning", "update", "AMS_data", "Conversions_Added.csv"))
conversions_added

Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom


In [14]:
# Concatenate the DataFrames
combined_df = pd.concat([conv_updatecov, conversions_added], ignore_index=True)
combined_df

  combined_df = pd.concat([conv_updatecov, conversions_added], ignore_index=True)


Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom
0,I-1971,0.003731,0.25,ea,16.75,g
1,I-15803,0.004000,1.00,can,250.00,ml
2,I-5505,0.001263,0.25,HEAD,49.50,g
3,I-11706,0.008547,1.00,ea,117.00,g
4,I-13308,0.024540,1.00,ea,40.75,g
...,...,...,...,...,...,...
158,I-6203,0.000529,1.00,1.89L,1890.00,ml
159,I-4128,0.002205,50.00,LBS,22679.50,g
160,I-4226,0.002205,50.00,LBS,22679.50,g
161,I-4234,0.002000,3.00,HEAD,1500.00,g


In [15]:
# Add the conv_updatecov file to the masterfile, Conversions_Added
path = os.path.join(os.getcwd(), "data", "cleaning", "AMS_data", "Conversions_Added.csv")
combined_df.to_csv(path, index = False, header = True)

***
## Import Update Info

In [16]:
# Import list of prep that need convert uom to standard uom manually
Manual_PrepU = pd.read_csv(os.path.join(os.getcwd(), "data", "cleaning", "update", "AMS_data", "Preps_UpdateUom.csv"))
# Here we can see that UOM examples are: each, ea, slice
Manual_PrepU.head()

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-14356,[PREP KAPPA MAKI,6.0,PORT,N,1511.82,g
1,P-14560,2023 Chicken Caesar wrap Prep,1.0,ea,Y,433.59,g
2,P-9003,2022 Gallery Burger prep,1.0,ea,N,501.82,g
3,P-17358,2023 Poutine Prep,1.0,ea,N,705.8,g
4,P-15006,2023 Power Punch Salad Prep,1.0,ea,N,416.73,g


In [17]:
# The row of the DatFrame that contains the PrepId == P-15006
Manual_PrepU.loc[Manual_PrepU["PrepId"] == "P-15006"]

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
4,P-15006,2023 Power Punch Salad Prep,1.0,ea,N,416.73,g


In [18]:
# Select the file path for new items list with category id
# New_Items_Added = pd.read_csv(os.path.join(os.getcwd(), "data", "mapping", "new items added", "New_Items_Added_10.csv"))
# New_Items_Added.tail(15)

# CHECK FOR CHANGES REQUIRES


# # COMMENTED OUT LINES BELOW, commented it out this time because no new items were added

New_Items_Added = pd.read_excel("data/mapping/new items added/AMS_data/New_items_2024/2024-07-11_New_Items_assigned.xlsx")
temp = pd.read_csv("data/mapping/new items added/AMS_data/New_items_2024/2024-07-16_New_Items_assigned.csv")
New_Items_Added = pd.concat([New_Items_Added, temp], ignore_index=True)
New_Items_Added[New_Items_Added["CategoryID"] == 63]

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category


In [19]:
manual_adjust_items = New_Items_Added[New_Items_Added["CategoryID"] == 59]
manual_adjust_items.to_csv("data/mapping/AMS_data/Manual_Adjust_Items.csv", index=False)

In [20]:
# # Import list of items that adjusted GHGe factor manually
# Manual_Factor = pd.read_csv(os.path.join(os.getcwd(), "data", "mapping","AMS_data", "Manual_Adjust_Items.csv"))
# Manual_Factor['Land Use (m^2)'] = 0
# Manual_Factor['Active Total Supply Chain Emissions (kg CO2 / kg food)'] = 0
# Manual_Factor['g N lost/kg product,Freshwater Withdrawals (L/FU)'] = 0
# Manual_Factor["Stress-Weighted Water Use (L/FU)"] = 0
# Manual_Factor.head()

### Update Correct Uom for Preps

In [21]:
# Update prep list with manually adjusted uom
for index, row in Manual_PrepU.iterrows():
    PrepId = Manual_PrepU.loc[index, 'PrepId']
    qty = Manual_PrepU.loc[index, 'StdQty']
    uom = Manual_PrepU.loc[index, 'StdUom']
    Preps.loc[Preps['PrepId'] == PrepId, 'StdQty'] = qty
    Preps.loc[Preps['PrepId'] == PrepId, 'StdUom'] = uom

In [22]:
Preps.drop_duplicates(subset=['PrepId'], inplace=True,)

In [23]:
Preps.head()

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-6068,Pro - Kansas City BBQ,1.25,L,Y,1250.0,ml
1,P-2824,2023 Alfredo Sauce Gal.,2250.0,ml,Y,2250.0,ml
2,P-1409,2023 Basmati Prep,2.0,Kg,Y,2000.0,g
3,P-17360,2023 Beef Gravy (prep),4.5,L,Y,4500.0,ml
4,P-16778,2023 Candied walnut,1.0,Kg,Y,1000.0,g


In [24]:
Preps.shape

(65, 7)

In [25]:
path = os.path.join(os.getcwd(), "data", "cleaning", "AMS_data", "Preps_List_Cleaned.csv")
Preps.to_csv(path, index = False, header = True)

In [26]:
Items_Assigned.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N,
1,I-8228,55,Grey Goose 1.14L,1.0,fl oz,1.0,fl oz,N,
2,I-15803,31,Red Bull Watermelon,1.0,can,1.0,can,N,
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N,
4,I-8667,58,ITEM GARLIC MAYO,2.0,fl oz,1.0,ml,N,


In [27]:
New_Items_Added.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category
0,I-4105,38,SPICE GARLIC GRANULATED,5.0,g,1.25,L,Y,root vegetables
1,I-4106,54,SPICE Pepper Black Grnd,10.0,g,1.25,L,Y,stimulants & spices misc.
2,I-4107,54,SPICE Nutmeg ground,2.0,g,2250.0,ml,Y,stimulants & spices misc.
3,I-4108,54,GRAVY MIX TRIO BROWN 1 STEP,490.0,g,4.5,L,Y,stimulants & spices misc.
4,I-4109,35,ORANGES 5lb,0.5,ea,1.0,ea,Y,citrus fruit


### Import List of New Items with Emission Factors Category ID Assigned

In [28]:
# REMOVED New_Items_Added from frame right now because no new items added. Change this when adding new items (Feb 1 2024)
frames = [Items_Assigned, New_Items_Added]
Items_Assigned_Updated = pd.concat(frames).reset_index(drop=True, inplace=False).drop_duplicates()
# Items_Assigned_Updated.head()

Items_Assigned_Updated.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N,
1,I-8228,55,Grey Goose 1.14L,1.0,fl oz,1.0,fl oz,N,
2,I-15803,31,Red Bull Watermelon,1.0,can,1.0,can,N,
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N,
4,I-8667,58,ITEM GARLIC MAYO,2.0,fl oz,1.0,ml,N,


In [29]:
Items_Assigned_Updated[Items_Assigned_Updated["ItemId"] == "I-4152"]

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category
479,I-4152,31,PEPPERS RED 5LB BAG,400.0,g,2.0,Kg,Y,other fruits


In [30]:
Items_Assigned_Updated[Items_Assigned_Updated["CategoryID"] == 63]

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category


In [31]:
Items_Assigned_Updated.shape

(530, 9)

In [32]:
# Double brackets used to specify the column as a dataframe and not a series
# converting the 'CategoryID' column in the Items_Assigned_Updated dataframe to numeric data type using the pd.to_numeric()
# use the apply column to apply the coversion to the entire column
Items_Assigned_Updated[['CategoryID']] = Items_Assigned_Updated[['CategoryID']].apply(pd.to_numeric)

In [33]:
Items_Assigned_Updated.to_csv("yo.csv")

In [34]:
path = os.path.join(os.getcwd(), "data", "mapping", "AMS_data", "Items_List_Assigned.csv")
Items_Assigned_Updated.to_csv(path, index = False, header = True)

## Mapping Items to Footprint Factors

In [35]:
# DEFINITION OF MAPPING: assigning certain attributes to data points based on criteria or predefined rules.

# Map GHG footprint factors
# merges two data frames Items_Assigned_Updated and ghge_factors on the columns CategoryID and Category ID, respectively. The 
# resulting data frame is stored in mapping.

# how='left' specifies that a left join is done between Items_Assigned_Updated and ghge_factors.
# left_on='CategoryID' specifies that the join condition for Items_Assigned_Updated should be based on the 'CategoryID' column.
# right_on='Category ID' specifies that the join condition for ghge_factors should be based on the 'Category ID' column.

mapping = pd.merge(Items_Assigned_Updated, ghge_factors.loc[:,['Category ID','Food Category','Active Total Supply Chain Emissions (kg CO2 / kg food)']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')
# if row in mapping has CategoryId == NaN then it assigns the value of the column Active Total Supply Chain Emissions (kg CO2 / kg food)
# to zero
for index in mapping.index:
    if np.isnan(mapping.loc[index,'Category ID']):
        mapping.loc[index,'Active Total Supply Chain Emissions (kg CO2 / kg food)'] = 0
# drops the columns Category ID and Food Category
mapping = mapping.drop(columns=['Category ID', 'Food Category_x'])
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food)
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N,citrus fruit,0.3942
1,I-8228,55,Grey Goose 1.14L,1.00,fl oz,1.0,fl oz,N,water & beverages,0.0000
2,I-15803,31,Red Bull Watermelon,1.00,can,1.0,can,N,other fruits,0.4306
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N,cabbages and other brassicas (broccoli),0.6220
4,I-8667,58,ITEM GARLIC MAYO,2.00,fl oz,1.0,ml,N,sauces & paste,0.0000
...,...,...,...,...,...,...,...,...,...,...
525,I-4237,39,Extract Vanilla Pure 500ml,50.00,ml,55.0,ea,Y,onions and leeks,0.3015
526,I-4196,58,ITEM SAUCE BURGER,2.00,oz (fl),,,Y,sauces & paste,0.0000
527,I-4197,58,DRESSING Ranch BtrMilk,2.00,oz (fl),,,Y,sauces & paste,0.0000
528,I-4200,38,Fries Sweet Potato,400.00,g,,,Y,root vegetables,0.3062


In [36]:
# Map nitrogen footprint factors
mapping = pd.merge(mapping, nitro_factors.loc[:,['Category ID','Food Category','g N lost/kg product']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')

for index in mapping.index:
    if np.isnan(mapping.loc[index,'CategoryID']):
        mapping.loc[index,'g N lost/kg product'] = 0

mapping = mapping.drop(columns=['Category ID', 'Food Category'])
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N,citrus fruit,0.3942,2.70
1,I-8228,55,Grey Goose 1.14L,1.00,fl oz,1.0,fl oz,N,water & beverages,0.0000,0.00
2,I-15803,31,Red Bull Watermelon,1.00,can,1.0,can,N,other fruits,0.4306,2.70
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N,cabbages and other brassicas (broccoli),0.6220,7.90
4,I-8667,58,ITEM GARLIC MAYO,2.00,fl oz,1.0,ml,N,sauces & paste,0.0000,6.75
...,...,...,...,...,...,...,...,...,...,...,...
525,I-4237,39,Extract Vanilla Pure 500ml,50.00,ml,55.0,ea,Y,onions and leeks,0.3015,7.90
526,I-4196,58,ITEM SAUCE BURGER,2.00,oz (fl),,,Y,sauces & paste,0.0000,6.75
527,I-4197,58,DRESSING Ranch BtrMilk,2.00,oz (fl),,,Y,sauces & paste,0.0000,6.75
528,I-4200,38,Fries Sweet Potato,400.00,g,,,Y,root vegetables,0.3062,7.90


In [37]:
# Map land footprint factors
mapping = pd.merge(mapping, land_factors.loc[:,['Category ID','Food Category','Land Use (m^2)']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')

for index in mapping.index:
    if np.isnan(mapping.loc[index,'CategoryID']):
        mapping.loc[index,'Land Use (m^2)'] = 0

mapping = mapping.drop(columns=['Category ID', 'Food Category'])
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2)
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N,citrus fruit,0.3942,2.70,0.00042
1,I-8228,55,Grey Goose 1.14L,1.00,fl oz,1.0,fl oz,N,water & beverages,0.0000,0.00,0.00000
2,I-15803,31,Red Bull Watermelon,1.00,can,1.0,can,N,other fruits,0.4306,2.70,0.00071
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N,cabbages and other brassicas (broccoli),0.6220,7.90,0.00230
4,I-8667,58,ITEM GARLIC MAYO,2.00,fl oz,1.0,ml,N,sauces & paste,0.0000,6.75,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...
525,I-4237,39,Extract Vanilla Pure 500ml,50.00,ml,55.0,ea,Y,onions and leeks,0.3015,7.90,0.00053
526,I-4196,58,ITEM SAUCE BURGER,2.00,oz (fl),,,Y,sauces & paste,0.0000,6.75,0.00000
527,I-4197,58,DRESSING Ranch BtrMilk,2.00,oz (fl),,,Y,sauces & paste,0.0000,6.75,0.00000
528,I-4200,38,Fries Sweet Potato,400.00,g,,,Y,root vegetables,0.3062,7.90,0.00032


In [38]:
# Map water footprint factors
# mapping: DataFrame that is used to assign sustainability-related factors (greenhouse gas emissions, nitrogen loss, and 
# water footprint) to food items based on their category and other attributes. 
mapping = pd.merge(mapping, water_factors.loc[:,['Category ID','Food Category','Freshwater Withdrawals (L/FU)', 'Stress-Weighted Water Use (L/FU)']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')

for index in mapping.index:
    if np.isnan(mapping.loc[index,'Category ID']):
        mapping.loc[index,'Freshwater Withdrawals (L/FU)'] = 0
        mapping.loc[index,'Stress-Weighted Water Use (L/FU)'] = 0

mapping = mapping.drop(columns=['Category ID', 'Food Category'])
mapping.drop_duplicates(subset=["ItemId"], inplace=True)
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2),Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N,citrus fruit,0.3942,2.70,0.00042,37.400,1345.500
1,I-8228,55,Grey Goose 1.14L,1.00,fl oz,1.0,fl oz,N,water & beverages,0.0000,0.00,0.00000,1.000,1.000
2,I-15803,31,Red Bull Watermelon,1.00,can,1.0,can,N,other fruits,0.4306,2.70,0.00071,3.500,4.700
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N,cabbages and other brassicas (broccoli),0.6220,7.90,0.00230,54.500,2483.400
4,I-8667,58,ITEM GARLIC MAYO,2.00,fl oz,1.0,ml,N,sauces & paste,0.0000,6.75,0.00000,20.225,1134.925
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
525,I-4237,39,Extract Vanilla Pure 500ml,50.00,ml,55.0,ea,Y,onions and leeks,0.3015,7.90,0.00053,1.900,57.000
526,I-4196,58,ITEM SAUCE BURGER,2.00,oz (fl),,,Y,sauces & paste,0.0000,6.75,0.00000,20.225,1134.925
527,I-4197,58,DRESSING Ranch BtrMilk,2.00,oz (fl),,,Y,sauces & paste,0.0000,6.75,0.00000,20.225,1134.925
528,I-4200,38,Fries Sweet Potato,400.00,g,,,Y,root vegetables,0.3062,7.90,0.00032,9.900,37.900


In [39]:
mapping[mapping["g N lost/kg product"].isnull()]

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2),Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)


In [40]:
mapping[mapping["ItemId"] == "I-4140"]

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2),Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
474,I-4140,40,CILANTRO 3ct,1.0,bunch,400.0,ml,Y,other vegetables,0.5029,7.9,0.00119,81.3,2939.5


In [41]:
mapping["CategoryID"].isnull().sum()

0

### Manully Adjust Footprint Factor for Specific Items

In [42]:
# # For Manual_Factor: dataframe with items that adjusted GHGe factor manually.
# # It takes the id for Manual_Factor for the index being iterated and if it is equal to item id of mapping dataframe then it sets
# # the values of the columns to the manually adjusted values
# # Note: the values for the columns in mapping DataFrame is adjusted and not Manual_Factor DataFrame
# for index, row in Manual_Factor.iterrows():
#     itemId = Manual_Factor.loc[index, 'ItemId']
#     ghge = Manual_Factor.loc[index, 'Active Total Supply Chain Emissions (kg CO2 / kg food)']
#     nitro = Manual_Factor.loc[index, 'g N lost/kg product']
#     water = Manual_Factor.loc[index, 'Freshwater Withdrawals (L/FU)']
#     land = Manual_Factor.loc[index, 'Land Use (m^2)']
#     str_water = Manual_Factor.loc[index, 'Stress-Weighted Water Use (L/FU)']
#     mapping.loc[mapping['ItemId'] == itemId, 'Active Total Supply Chain Emissions (kg CO2 / kg food)'] = ghge
#     mapping.loc[mapping['ItemId'] == itemId, 'g N lost/kg product'] = nitro
#     mapping.loc[mapping['ItemId'] == itemId, 'Freshwater Withdrawals (L/FU)'] = water
#     mapping.loc[mapping['ItemId'] == itemId, 'Stress-Weighted Water Use (L/FU)'] = str_water
#     mapping.loc[mapping['ItemId'] == itemId, 'Land Use (m^2)'] = land

In [43]:
mapping.drop_duplicates(subset = ['ItemId'], inplace=True)
mapping.dtypes

ItemId                                                     object
CategoryID                                                  int64
Description                                                object
CaseQty                                                   float64
CaseUOM                                                    object
PakQty                                                    float64
PakUOM                                                     object
InventoryGroup                                             object
Food Category_y                                            object
Active Total Supply Chain Emissions (kg CO2 / kg food)    float64
g N lost/kg product                                       float64
Land Use (m^2)                                            float64
Freshwater Withdrawals (L/FU)                             float64
Stress-Weighted Water Use (L/FU)                          float64
dtype: object

In [44]:
mapping.shape

(530, 14)

In [45]:
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2),Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N,citrus fruit,0.3942,2.70,0.00042,37.400,1345.500
1,I-8228,55,Grey Goose 1.14L,1.00,fl oz,1.0,fl oz,N,water & beverages,0.0000,0.00,0.00000,1.000,1.000
2,I-15803,31,Red Bull Watermelon,1.00,can,1.0,can,N,other fruits,0.4306,2.70,0.00071,3.500,4.700
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N,cabbages and other brassicas (broccoli),0.6220,7.90,0.00230,54.500,2483.400
4,I-8667,58,ITEM GARLIC MAYO,2.00,fl oz,1.0,ml,N,sauces & paste,0.0000,6.75,0.00000,20.225,1134.925
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
525,I-4237,39,Extract Vanilla Pure 500ml,50.00,ml,55.0,ea,Y,onions and leeks,0.3015,7.90,0.00053,1.900,57.000
526,I-4196,58,ITEM SAUCE BURGER,2.00,oz (fl),,,Y,sauces & paste,0.0000,6.75,0.00000,20.225,1134.925
527,I-4197,58,DRESSING Ranch BtrMilk,2.00,oz (fl),,,Y,sauces & paste,0.0000,6.75,0.00000,20.225,1134.925
528,I-4200,38,Fries Sweet Potato,400.00,g,,,Y,root vegetables,0.3062,7.90,0.00032,9.900,37.900


In [46]:
mapping[mapping["ItemId"] == "I-1874"]

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2),Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
16,I-1874,38,GARLIC WHOLE PEELED,200.0,g,30.0,oz,N,root vegetables,0.3062,7.9,0.00032,9.9,37.9


In [47]:
mapping.to_csv("mapping.csv")

In [48]:
ingredients = pd.read_csv("data/preprocessed/AMS_data/Ingredients_List.csv")
ingredients

Unnamed: 0,IngredientId,Qty,Uom,Recipe
0,P-18275,1.0,ea,R-2654
1,P-5506,1.0,ea,R-7227
2,P-17013,2.0,Tbsp,R-7227
3,P-14560,1.0,ea,R-8990
4,P-18295,1.0,ea,R-5554
...,...,...,...,...
800,I-3005,60.0,g,P-15368
801,I-3804,18.0,g,P-15368
802,I-6865,500.0,g,P-15368
803,I-9117,1.0,L,P-15368


In [49]:
map_list = mapping["ItemId"].unique()
absent_list = []

for item in ingredients["IngredientId"].unique():
    if item not in map_list and not item.startswith("P-"):
        absent_list.append(item)

# absent_list contains the IngredientIds that are not present in mapping but are present in the ingredients DataFrame
print(absent_list)


[]


In [50]:
# Converts mapping DataFrame to the Mapping.csv file
path = os.path.join(os.getcwd(), "data", "mapping", "AMS_data", "Mapping.csv")
mapping.to_csv(path, index = False, header = True)