# Climate-Friendly Food Systems (CFFS) Labelling Project

### The University of British Columbia

***

## Part III: Update Information and Mapping

## Set up and Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import  os
import xml.etree.ElementTree as et
from xml.etree.ElementTree import parse
from datetime import datetime

In [2]:
# RUN ONLY ONCE
# os.chdir is used to change the current directory to the specified path
os.chdir("../") # Sets path to the repo folder as it is one level above where this file exists!
path = os.getcwd()
print(path)

/Users/vivaanwadhwa/Documents/GitHub/CFFS_sharon_2024


***
## Import Preprocessed Datasets

In [3]:
# Creates a DataFrame from reading a csv file 
Preps = pd.read_csv(os.path.join(os.getcwd(), "data", "cleaning", "AMS_data", "Preps_Unit_Cleaned.csv"))
Preps.head()

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-2679,Pro - Tahini Sauce,1.0,L,Y,1000.0,ml
1,P-15427,Pro - Vegan Chipotle Mayo,16.5,L,Y,16500.0,ml
2,P-7912,BCPrep - Baked Eggs for Wraps,4.3,Kg,Y,4300.0,g
3,P-14403,Prep Cream Cheese - Garlic,11000.0,g,Y,11000.0,g


In [4]:
# Creates a DataFrame from reading a csv file 
ghge_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "ghge_factors.csv"))
ghge_factors.head()

Unnamed: 0,Category ID,Food Category,Active Total Supply Chain Emissions (kg CO2 / kg food)
0,1,beef & buffalo meat,41.3463
1,2,lamb/mutton & goat meat,41.6211
2,3,pork (pig meat),9.8315
3,4,"poultry (chicken, turkey)",4.3996
4,5,butter,11.4316


In [5]:
# Creates a DataFrame from reading a csv file 
nitro_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "nitrogen_factors.csv"))
nitro_factors.head()

Unnamed: 0,Category ID,Food Category,g N lost/kg product
0,1,beef & buffalo meat,329.5
1,2,lamb/mutton & goat meat,231.15
2,3,pork (pig meat),132.8
3,4,"poultry (chicken, turkey)",116.8
4,5,butter,100.35


In [6]:
# Creates a DataFrame from reading a csv file 
water_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "water_factors.csv"))
water_factors.head()

Unnamed: 0,Category ID,Food Category,Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
0,1,beef & buffalo meat,1677.2,61309.0
1,2,lamb/mutton & goat meat,461.2,258.9
2,3,pork (pig meat),1810.3,54242.7
3,4,"poultry (chicken, turkey)",370.3,333.5
4,5,butter,1010.176,50055.168


In [7]:
# Creates a DataFrame from reading a csv file 
land_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "land_factors.csv"))
land_factors.rename(columns={'km^2 land use/kg product': 'Land Use (m^2)'}, inplace=True)
land_factors['Land Use (m^2)'] *= 1000
land_factors.head()

Unnamed: 0,Category ID,Food Category,Land Use (m^2)
0,1,beef & buffalo meat,0.12645
1,2,lamb/mutton & goat meat,0.1432
2,3,pork (pig meat),0.02102
3,4,"poultry (chicken, turkey)",0.01151
4,5,butter,0.01395


In [8]:
# Load current Items List with assigned Emission Factors Category ID
Items_Assigned = pd.read_csv(os.path.join(os.getcwd(), "data", "mapping", "AMS_data", "Items_List_Assigned.csv"))
Items_Assigned.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N,
1,I-8228,55,Grey Goose 1.14L,1.0,fl oz,1.0,fl oz,N,
2,I-15803,31,Red Bull Watermelon,1.0,can,1.0,can,N,
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N,
4,I-8667,58,ITEM GARLIC MAYO,2.0,fl oz,1.0,ml,N,


In [9]:
# new_items = pd.read_csv("data/mapping/new items added/New_Items_Added_11.csv")
# new_items = pd.read_csv("data/mapping/new items added/New_Items_2023/New_Items_Added_2023_08_01.csv")
# CHECK FOR CHANGES REQUIRES

# # COMMENTED OUT LINES BELOW, commented it out this time because no new items were added

new_items = pd.read_csv("data/mapping/AMS_data/new items/2024-08-01_New_Items.csv")
new_items.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-16857,,ITEM SAUCE BURGER,2.0,oz (fl),,,Y
1,I-16858,,DRESSING Ranch BtrMilk,2.0,oz (fl),,,Y
2,I-16859,,Lemon Pepper Seasoning,10.0,g,,,Y
3,I-16861,,ITEM G21- MANGO HABANERO SAUCE,2.0,oz (fl),,,Y
4,I-16862,,Fries Sweet Potato,400.0,g,,,Y


In [10]:
item_nonstd = pd.read_csv("data/cleaning/AMS_data/Items_Nonstd.csv")
item_nonstd

Unnamed: 0,IngredientId,Qty,Uom,Recipe,Description
0,I-4109,1.0,bunch,P-2679,CILANTRO 3ct
1,I-4114,10.0,ea,P-2424,"TORTILLA 10"" FLOUR PRESSED TF"
2,I-2992,2.0,slice,P-10674,Bacon Pre-Ckd 30-34 ct
3,I-4122,2.0,2 KG,P-7912,EGG LIQ WHL CAGE FREE FRSH
4,I-4126,2.0,slice,P-1488,MULTIGRAIN CLUBHOUSE
...,...,...,...,...,...
105,I-4323,1.0,ea,R-2588,VEGAN COOKIE
106,I-4324,1.0,ea,R-8062,VEGAN DATE BAR
107,I-4325,1.0,ea,R-7939,CHICK SALAD
108,I-4326,1.0,ea,R-8651,EGG SALAD


In [11]:
preps_nonstd = pd.read_csv("data/cleaning/AMS_data/Preps_NonstdUom.csv")
preps_nonstd

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-2824,BC - Tomato Focacia,14.0,ea,Y,,
1,P-5012,Catr- Roasted Chicken Breast,1.0,ea,Y,,
2,P-9935,Baking - Focacia Baguette,14.0,ea,Y,,
3,P-2424,BCPrep - Asian Sesame Wrap,10.0,ea,Y,,
4,P-10674,BCPrep - BACON WRAP,1.0,ea,Y,,
5,P-1520,BCPrep - Chicken Quinoa Wrap,10.0,ea,Y,,
6,P-1488,BCPrep - Chicken Salad,1.0,ea,Y,,
7,P-2535,BCPrep - Egg Salad,1.0,ea,Y,,
8,P-4582,BCPrep - Ham & Swiss Croissant,1.0,ea,Y,,
9,P-4811,BCPrep - Mediterranean Wrap,10.0,ea,Y,,


In [12]:
conv_updatecov = pd.read_csv("data/cleaning/update/AMS_data/Conv_UpdateConv.csv")
conv_updatecov

Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom
0,I-1971,0.003731,0.25,ea,16.75,g
1,I-15803,0.004000,1.00,can,250.00,ml
2,I-5505,0.001263,0.25,HEAD,49.50,g
3,I-11706,0.008547,1.00,ea,117.00,g
4,I-13308,0.024540,1.00,ea,40.75,g
...,...,...,...,...,...,...
190,I-3810,0.010000,1.00,ea,100.00,g
191,I-3815,0.010000,0.25,ea,25.00,g
192,I-3816,0.010000,0.25,ea,25.00,g
193,I-3817,0.010000,0.25,ea,25.00,g


# Update Conversion_Added.csv

In [13]:
conversions_added = pd.read_csv(os.path.join(os.getcwd(), "data", "cleaning", "update", "AMS_data", "Conversions_Added.csv"))
conversions_added

Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom


In [14]:
# Concatenate the DataFrames
combined_df = pd.concat([conv_updatecov, conversions_added], ignore_index=True)
combined_df

  combined_df = pd.concat([conv_updatecov, conversions_added], ignore_index=True)


Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom
0,I-1971,0.003731,0.25,ea,16.75,g
1,I-15803,0.004000,1.00,can,250.00,ml
2,I-5505,0.001263,0.25,HEAD,49.50,g
3,I-11706,0.008547,1.00,ea,117.00,g
4,I-13308,0.024540,1.00,ea,40.75,g
...,...,...,...,...,...,...
190,I-3810,0.010000,1.00,ea,100.00,g
191,I-3815,0.010000,0.25,ea,25.00,g
192,I-3816,0.010000,0.25,ea,25.00,g
193,I-3817,0.010000,0.25,ea,25.00,g


In [15]:
# Add the conv_updatecov file to the masterfile, Conversions_Added
path = os.path.join(os.getcwd(), "data", "cleaning", "AMS_data", "Conversions_Added.csv")
combined_df.to_csv(path, index = False, header = True)

***
## Import Update Info

In [16]:
# Import list of prep that need convert uom to standard uom manually
Manual_PrepU = pd.read_csv(os.path.join(os.getcwd(), "data", "cleaning", "update", "AMS_data", "Preps_UpdateUom.csv"))
# Here we can see that UOM examples are: each, ea, slice
Manual_PrepU.head()

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-14356,[PREP KAPPA MAKI,6.0,PORT,N,1511.82,g
1,P-14560,2023 Chicken Caesar wrap Prep,1.0,ea,Y,433.59,g
2,P-9003,2022 Gallery Burger prep,1.0,ea,N,501.82,g
3,P-17358,2023 Poutine Prep,1.0,ea,N,705.8,g
4,P-15006,2023 Power Punch Salad Prep,1.0,ea,N,416.73,g


In [17]:
# The row of the DatFrame that contains the PrepId == P-15006
Manual_PrepU.loc[Manual_PrepU["PrepId"] == "P-15006"]

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
4,P-15006,2023 Power Punch Salad Prep,1.0,ea,N,416.73,g


In [18]:
# Select the file path for new items list with category id
# New_Items_Added = pd.read_csv(os.path.join(os.getcwd(), "data", "mapping", "new items added", "New_Items_Added_10.csv"))
# New_Items_Added.tail(15)

# CHECK FOR CHANGES REQUIRES


# # COMMENTED OUT LINES BELOW, commented it out this time because no new items were added

New_Items_Added = pd.read_csv("data/mapping/new items added/AMS_data/New_items_2024/2024-09-10_New_Items_assigned.csv")
New_Items_Added

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-4104,4,CHICKEN BRST 6Z B/S 19% IQF,6.0,oz (wt),1.0,ea,Y
1,I-4110,58,ITEM MAYO VEGAN,16.0,L,16.5,L,Y
2,I-4111,24,MIX - FOCCACHIA,12.0,g,14.0,ea,Y
3,I-4112,37,TOMATO - SUNDRD JULIENNE,135.0,g,14.0,ea,Y
4,I-4113,24,YEAST BAKERS FRSH,130.0,g,14.0,ea,Y
5,I-4114,24,"TORTILLA 10"" FLOUR PRESSED TF",10.0,ea,10.0,ea,Y
6,I-4115,16,ITEM SLICED RED ONION,50.0,g,10.0,ea,Y
7,I-4116,16,ITEM SHAVED HERL CARROTS,150.0,g,10.0,ea,Y
8,I-4117,20,ITEM SESAME TOFU,2.0,Kg,10.0,ea,Y
9,I-4120,58,ITEM AIOLI CHIPOLTE,10.0,ml,1.0,ea,Y


In [19]:
manual_adjust_items = New_Items_Added[New_Items_Added["CategoryID"] == 59]
display(manual_adjust_items)
manual_adjust_items.to_csv("data/mapping/AMS_data/Manual_Adjust_Items.csv", index=False)

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup


In [20]:
# # Import list of items that adjusted GHGe factor manually
Manual_Factor = pd.read_csv(os.path.join(os.getcwd(), "data", "mapping","AMS_data", "Manual_Adjust_Items.csv"))
Manual_Factor['Land Use (m^2)'] = 0
Manual_Factor['Active Total Supply Chain Emissions (kg CO2 / kg food)'] = 0
Manual_Factor['g N lost/kg product,Freshwater Withdrawals (L/FU)'] = 0
Manual_Factor["Stress-Weighted Water Use (L/FU)"] = 0
Manual_Factor.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Land Use (m^2),Active Total Supply Chain Emissions (kg CO2 / kg food),"g N lost/kg product,Freshwater Withdrawals (L/FU)",Stress-Weighted Water Use (L/FU)


### Update Correct Uom for Preps

In [21]:
# Update prep list with manually adjusted uom
for index, row in Manual_PrepU.iterrows():
    PrepId = Manual_PrepU.loc[index, 'PrepId']
    qty = Manual_PrepU.loc[index, 'StdQty']
    uom = Manual_PrepU.loc[index, 'StdUom']
    Preps.loc[Preps['PrepId'] == PrepId, 'StdQty'] = qty
    Preps.loc[Preps['PrepId'] == PrepId, 'StdUom'] = uom

In [22]:
Preps.drop_duplicates(subset=['PrepId'], inplace=True,)

In [23]:
Preps.head()

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-2679,Pro - Tahini Sauce,1.0,L,Y,1000.0,ml
1,P-15427,Pro - Vegan Chipotle Mayo,16.5,L,Y,16500.0,ml
2,P-7912,BCPrep - Baked Eggs for Wraps,4.3,Kg,Y,4300.0,g
3,P-14403,Prep Cream Cheese - Garlic,11000.0,g,Y,450.0,g


In [24]:
Preps.shape

(4, 7)

In [25]:
path = os.path.join(os.getcwd(), "data", "cleaning", "AMS_data", "Preps_List_Cleaned.csv")
Preps.to_csv(path, index = False, header = True)

In [26]:
Items_Assigned.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N,
1,I-8228,55,Grey Goose 1.14L,1.0,fl oz,1.0,fl oz,N,
2,I-15803,31,Red Bull Watermelon,1.0,can,1.0,can,N,
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N,
4,I-8667,58,ITEM GARLIC MAYO,2.0,fl oz,1.0,ml,N,


In [27]:
New_Items_Added.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-4104,4,CHICKEN BRST 6Z B/S 19% IQF,6.0,oz (wt),1.0,ea,Y
1,I-4110,58,ITEM MAYO VEGAN,16.0,L,16.5,L,Y
2,I-4111,24,MIX - FOCCACHIA,12.0,g,14.0,ea,Y
3,I-4112,37,TOMATO - SUNDRD JULIENNE,135.0,g,14.0,ea,Y
4,I-4113,24,YEAST BAKERS FRSH,130.0,g,14.0,ea,Y


### Import List of New Items with Emission Factors Category ID Assigned

In [28]:
# REMOVED New_Items_Added from frame right now because no new items added. Change this when adding new items (Feb 1 2024)
frames = [Items_Assigned, New_Items_Added]
Items_Assigned_Updated = pd.concat(frames).reset_index(drop=True, inplace=False).drop_duplicates()
# Items_Assigned_Updated.head()

Items_Assigned_Updated.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N,
1,I-8228,55,Grey Goose 1.14L,1.0,fl oz,1.0,fl oz,N,
2,I-15803,31,Red Bull Watermelon,1.0,can,1.0,can,N,
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N,
4,I-8667,58,ITEM GARLIC MAYO,2.0,fl oz,1.0,ml,N,


In [29]:
Items_Assigned_Updated[Items_Assigned_Updated["ItemId"] == "I-4152"]

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category
479,I-4152,31,PEPPERS RED 5LB BAG,400.0,g,2.0,Kg,Y,other fruits


In [30]:
Items_Assigned_Updated[Items_Assigned_Updated["CategoryID"] == 63]

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category


In [31]:
Items_Assigned_Updated.shape

(568, 9)

In [32]:
# Double brackets used to specify the column as a dataframe and not a series
# converting the 'CategoryID' column in the Items_Assigned_Updated dataframe to numeric data type using the pd.to_numeric()
# use the apply column to apply the coversion to the entire column
Items_Assigned_Updated[['CategoryID']] = Items_Assigned_Updated[['CategoryID']].apply(pd.to_numeric)

In [33]:
Items_Assigned_Updated.to_csv("yo.csv")

In [34]:
path = os.path.join(os.getcwd(), "data", "mapping", "AMS_data", "Items_List_Assigned.csv")
Items_Assigned_Updated.to_csv(path, index = False, header = True)

## Mapping Items to Footprint Factors

In [35]:
# DEFINITION OF MAPPING: assigning certain attributes to data points based on criteria or predefined rules.

# Map GHG footprint factors
# merges two data frames Items_Assigned_Updated and ghge_factors on the columns CategoryID and Category ID, respectively. The 
# resulting data frame is stored in mapping.

# how='left' specifies that a left join is done between Items_Assigned_Updated and ghge_factors.
# left_on='CategoryID' specifies that the join condition for Items_Assigned_Updated should be based on the 'CategoryID' column.
# right_on='Category ID' specifies that the join condition for ghge_factors should be based on the 'Category ID' column.

mapping = pd.merge(Items_Assigned_Updated, ghge_factors.loc[:,['Category ID','Food Category','Active Total Supply Chain Emissions (kg CO2 / kg food)']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')
# if row in mapping has CategoryId == NaN then it assigns the value of the column Active Total Supply Chain Emissions (kg CO2 / kg food)
# to zero
for index in mapping.index:
    if np.isnan(mapping.loc[index,'Category ID']):
        mapping.loc[index,'Active Total Supply Chain Emissions (kg CO2 / kg food)'] = 0
# drops the columns Category ID and Food Category
mapping = mapping.drop(columns=['Category ID', 'Food Category_x'])
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food)
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N,citrus fruit,0.3942
1,I-8228,55,Grey Goose 1.14L,1.00,fl oz,1.0,fl oz,N,water & beverages,0.0000
2,I-15803,31,Red Bull Watermelon,1.00,can,1.0,can,N,other fruits,0.4306
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N,cabbages and other brassicas (broccoli),0.6220
4,I-8667,58,ITEM GARLIC MAYO,2.00,fl oz,1.0,ml,N,sauces & paste,0.0000
...,...,...,...,...,...,...,...,...,...,...
563,I-3816,24,Bagel - Rosemary,0.25,ea,,,Y,"wheat/rye (bread, pasta, baked goods)",1.5225
564,I-3817,24,Bagel - Jalapeno Cheddar,0.25,ea,,,Y,"wheat/rye (bread, pasta, baked goods)",1.5225
565,I-3818,27,ALMOND MILK Barista,14.00,oz (fl),,,Y,almond milk,0.7021
566,I-3819,28,Oat Milk Barista Blend,14.00,oz (fl),,,Y,oat milk,0.9943


In [36]:
# Map nitrogen footprint factors
mapping = pd.merge(mapping, nitro_factors.loc[:,['Category ID','Food Category','g N lost/kg product']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')

for index in mapping.index:
    if np.isnan(mapping.loc[index,'CategoryID']):
        mapping.loc[index,'g N lost/kg product'] = 0

mapping = mapping.drop(columns=['Category ID', 'Food Category'])
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N,citrus fruit,0.3942,2.70
1,I-8228,55,Grey Goose 1.14L,1.00,fl oz,1.0,fl oz,N,water & beverages,0.0000,0.00
2,I-15803,31,Red Bull Watermelon,1.00,can,1.0,can,N,other fruits,0.4306,2.70
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N,cabbages and other brassicas (broccoli),0.6220,7.90
4,I-8667,58,ITEM GARLIC MAYO,2.00,fl oz,1.0,ml,N,sauces & paste,0.0000,6.75
...,...,...,...,...,...,...,...,...,...,...,...
563,I-3816,24,Bagel - Rosemary,0.25,ea,,,Y,"wheat/rye (bread, pasta, baked goods)",1.5225,14.80
564,I-3817,24,Bagel - Jalapeno Cheddar,0.25,ea,,,Y,"wheat/rye (bread, pasta, baked goods)",1.5225,14.80
565,I-3818,27,ALMOND MILK Barista,14.00,oz (fl),,,Y,almond milk,0.7021,3.05
566,I-3819,28,Oat Milk Barista Blend,14.00,oz (fl),,,Y,oat milk,0.9943,0.68


In [37]:
# Map land footprint factors
mapping = pd.merge(mapping, land_factors.loc[:,['Category ID','Food Category','Land Use (m^2)']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')

for index in mapping.index:
    if np.isnan(mapping.loc[index,'CategoryID']):
        mapping.loc[index,'Land Use (m^2)'] = 0

mapping = mapping.drop(columns=['Category ID', 'Food Category'])
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2)
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N,citrus fruit,0.3942,2.70,0.00042
1,I-8228,55,Grey Goose 1.14L,1.00,fl oz,1.0,fl oz,N,water & beverages,0.0000,0.00,0.00000
2,I-15803,31,Red Bull Watermelon,1.00,can,1.0,can,N,other fruits,0.4306,2.70,0.00071
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N,cabbages and other brassicas (broccoli),0.6220,7.90,0.00230
4,I-8667,58,ITEM GARLIC MAYO,2.00,fl oz,1.0,ml,N,sauces & paste,0.0000,6.75,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...
563,I-3816,24,Bagel - Rosemary,0.25,ea,,,Y,"wheat/rye (bread, pasta, baked goods)",1.5225,14.80,0.00489
564,I-3817,24,Bagel - Jalapeno Cheddar,0.25,ea,,,Y,"wheat/rye (bread, pasta, baked goods)",1.5225,14.80,0.00489
565,I-3818,27,ALMOND MILK Barista,14.00,oz (fl),,,Y,almond milk,0.7021,3.05,0.00050
566,I-3819,28,Oat Milk Barista Blend,14.00,oz (fl),,,Y,oat milk,0.9943,0.68,0.00077


In [38]:
# Map water footprint factors
# mapping: DataFrame that is used to assign sustainability-related factors (greenhouse gas emissions, nitrogen loss, and 
# water footprint) to food items based on their category and other attributes. 
mapping = pd.merge(mapping, water_factors.loc[:,['Category ID','Food Category','Freshwater Withdrawals (L/FU)', 'Stress-Weighted Water Use (L/FU)']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')

for index in mapping.index:
    if np.isnan(mapping.loc[index,'Category ID']):
        mapping.loc[index,'Freshwater Withdrawals (L/FU)'] = 0
        mapping.loc[index,'Stress-Weighted Water Use (L/FU)'] = 0

mapping = mapping.drop(columns=['Category ID', 'Food Category'])
mapping.drop_duplicates(subset=["ItemId"], inplace=True)
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2),Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N,citrus fruit,0.3942,2.70,0.00042,37.400,1345.500
1,I-8228,55,Grey Goose 1.14L,1.00,fl oz,1.0,fl oz,N,water & beverages,0.0000,0.00,0.00000,1.000,1.000
2,I-15803,31,Red Bull Watermelon,1.00,can,1.0,can,N,other fruits,0.4306,2.70,0.00071,3.500,4.700
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N,cabbages and other brassicas (broccoli),0.6220,7.90,0.00230,54.500,2483.400
4,I-8667,58,ITEM GARLIC MAYO,2.00,fl oz,1.0,ml,N,sauces & paste,0.0000,6.75,0.00000,20.225,1134.925
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
563,I-3816,24,Bagel - Rosemary,0.25,ea,,,Y,"wheat/rye (bread, pasta, baked goods)",1.5225,14.80,0.00489,419.200,12821.700
564,I-3817,24,Bagel - Jalapeno Cheddar,0.25,ea,,,Y,"wheat/rye (bread, pasta, baked goods)",1.5225,14.80,0.00489,419.200,12821.700
565,I-3818,27,ALMOND MILK Barista,14.00,oz (fl),,,Y,almond milk,0.7021,3.05,0.00050,455.825,32341.075
566,I-3819,28,Oat Milk Barista Blend,14.00,oz (fl),,,Y,oat milk,0.9943,0.68,0.00077,67.030,2445.630


In [39]:
mapping[mapping["g N lost/kg product"].isnull()]

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2),Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)


In [40]:
mapping[mapping["ItemId"] == "I-4140"]

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2),Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
474,I-4140,40,CILANTRO 3ct,1.0,bunch,400.0,ml,Y,other vegetables,0.5029,7.9,0.00119,81.3,2939.5


In [41]:
mapping["CategoryID"].isnull().sum()

np.int64(0)

### Manully Adjust Footprint Factor for Specific Items

In [42]:
# # For Manual_Factor: dataframe with items that adjusted GHGe factor manually.
# # It takes the id for Manual_Factor for the index being iterated and if it is equal to item id of mapping dataframe then it sets
# # the values of the columns to the manually adjusted values
# # Note: the values for the columns in mapping DataFrame is adjusted and not Manual_Factor DataFrame
# for index, row in Manual_Factor.iterrows():
#     itemId = Manual_Factor.loc[index, 'ItemId']
#     ghge = Manual_Factor.loc[index, 'Active Total Supply Chain Emissions (kg CO2 / kg food)']
#     nitro = Manual_Factor.loc[index, 'g N lost/kg product']
#     water = Manual_Factor.loc[index, 'Freshwater Withdrawals (L/FU)']
#     land = Manual_Factor.loc[index, 'Land Use (m^2)']
#     str_water = Manual_Factor.loc[index, 'Stress-Weighted Water Use (L/FU)']
#     mapping.loc[mapping['ItemId'] == itemId, 'Active Total Supply Chain Emissions (kg CO2 / kg food)'] = ghge
#     mapping.loc[mapping['ItemId'] == itemId, 'g N lost/kg product'] = nitro
#     mapping.loc[mapping['ItemId'] == itemId, 'Freshwater Withdrawals (L/FU)'] = water
#     mapping.loc[mapping['ItemId'] == itemId, 'Stress-Weighted Water Use (L/FU)'] = str_water
#     mapping.loc[mapping['ItemId'] == itemId, 'Land Use (m^2)'] = land

In [43]:
mapping.drop_duplicates(subset = ['ItemId'], inplace=True)
mapping.dtypes

ItemId                                                     object
CategoryID                                                  int64
Description                                                object
CaseQty                                                   float64
CaseUOM                                                    object
PakQty                                                    float64
PakUOM                                                     object
InventoryGroup                                             object
Food Category_y                                            object
Active Total Supply Chain Emissions (kg CO2 / kg food)    float64
g N lost/kg product                                       float64
Land Use (m^2)                                            float64
Freshwater Withdrawals (L/FU)                             float64
Stress-Weighted Water Use (L/FU)                          float64
dtype: object

In [44]:
mapping.shape

(566, 14)

In [45]:
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2),Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N,citrus fruit,0.3942,2.70,0.00042,37.400,1345.500
1,I-8228,55,Grey Goose 1.14L,1.00,fl oz,1.0,fl oz,N,water & beverages,0.0000,0.00,0.00000,1.000,1.000
2,I-15803,31,Red Bull Watermelon,1.00,can,1.0,can,N,other fruits,0.4306,2.70,0.00071,3.500,4.700
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N,cabbages and other brassicas (broccoli),0.6220,7.90,0.00230,54.500,2483.400
4,I-8667,58,ITEM GARLIC MAYO,2.00,fl oz,1.0,ml,N,sauces & paste,0.0000,6.75,0.00000,20.225,1134.925
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
563,I-3816,24,Bagel - Rosemary,0.25,ea,,,Y,"wheat/rye (bread, pasta, baked goods)",1.5225,14.80,0.00489,419.200,12821.700
564,I-3817,24,Bagel - Jalapeno Cheddar,0.25,ea,,,Y,"wheat/rye (bread, pasta, baked goods)",1.5225,14.80,0.00489,419.200,12821.700
565,I-3818,27,ALMOND MILK Barista,14.00,oz (fl),,,Y,almond milk,0.7021,3.05,0.00050,455.825,32341.075
566,I-3819,28,Oat Milk Barista Blend,14.00,oz (fl),,,Y,oat milk,0.9943,0.68,0.00077,67.030,2445.630


In [46]:
mapping[mapping["ItemId"] == "I-1874"]

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category_y,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2),Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
16,I-1874,38,GARLIC WHOLE PEELED,200.0,g,30.0,oz,N,root vegetables,0.3062,7.9,0.00032,9.9,37.9


In [47]:
mapping.to_csv("mapping.csv")

In [48]:
ingredients = pd.read_csv("data/preprocessed/AMS_data/Ingredients_List.csv")
ingredients

Unnamed: 0,IngredientId,Qty,Uom,Recipe
0,I-4099,60.0,ml,P-2824
1,I-2402,300.0,g,P-2824
2,P-9935,14.0,ea,P-2824
3,I-4104,6.0,oz (wt),P-5012
4,P-5506,50.0,g,P-5012
...,...,...,...,...
343,P-1520,1.0,ea,R-9786
344,P-9279,1.0,ea,R-8350
345,P-4811,1.0,ea,R-3296
346,I-4327,9.0,g,R-4006


In [49]:
map_list = mapping["ItemId"].unique()
absent_list = []

for item in ingredients["IngredientId"].unique():
    if item not in map_list and not item.startswith("P-"):
        absent_list.append(item)

# absent_list contains the IngredientIds that are not present in mapping but are present in the ingredients DataFrame
print(absent_list)


['I-4145', 'I-4146', 'I-4150', 'I-4151', 'I-4191', 'I-4214', 'I-4215', 'I-4223', 'I-4230', 'I-4239', 'I-4240', 'I-4241', 'I-4242', 'I-4243', 'I-4244', 'I-4245', 'I-4246', 'I-4247', 'I-4248', 'I-4250', 'I-4251', 'I-4252', 'I-4253', 'I-4254', 'I-4255', 'I-4256', 'I-4258', 'I-4259', 'I-4260', 'I-4261', 'I-4262', 'I-4263', 'I-4264', 'I-4265', 'I-4266', 'I-4267', 'I-4268', 'I-4269', 'I-4270', 'I-4271', 'I-4272', 'I-4273', 'I-4274', 'I-4275', 'I-4276', 'I-4277', 'I-4278', 'I-4279', 'I-4280', 'I-4281', 'I-4282', 'I-4283', 'I-4284', 'I-4285', 'I-4286', 'I-4287', 'I-4288', 'I-4289', 'I-4291', 'I-4292', 'I-4293', 'I-4294', 'I-4295', 'I-4297', 'I-4298', 'I-4299', 'I-4300', 'I-4301', 'I-4302', 'I-4303', 'I-4304', 'I-4305', 'I-4306', 'I-4307', 'I-4308', 'I-4309', 'I-4310', 'I-4311', 'I-4312', 'I-4313', 'I-4314', 'I-4315', 'I-4316', 'I-4317', 'I-4318', 'I-4319', 'I-4320', 'I-4321', 'I-4322', 'I-4323', 'I-4324', 'I-4325', 'I-4326', 'I-4327', 'I-4328']


In [50]:
for x in absent_list:
    print(ingredients[ingredients["IngredientId"] == x])

    IngredientId  Qty Uom  Recipe
88        I-4145  1.0  ea  P-4257
163       I-4145  1.0  ea  P-4257
    IngredientId   Qty Uom  Recipe
89        I-4146  10.0   g  P-4257
164       I-4146  10.0   g  P-4257
    IngredientId  Qty Uom  Recipe
93        I-4150  1.0  ea  P-7873
168       I-4150  1.0  ea  P-7873
    IngredientId   Qty Uom  Recipe
94        I-4151  15.0   g  P-4611
169       I-4151  15.0   g  P-4611
    IngredientId  Qty Uom  Recipe
185       I-4191  2.0  ea  R-2796
    IngredientId   Qty Uom  Recipe
187       I-4214  30.0  ml  R-2796
    IngredientId   Qty Uom  Recipe
188       I-4215  60.0   g  R-2796
    IngredientId  Qty    Uom  Recipe
189       I-4223  1.0  slice  R-3504
    IngredientId  Qty Uom   Recipe
191       I-4230  1.0  ea   R-3621
202       I-4230  1.0  ea  R-10771
213       I-4230  1.0  ea  R-10763
    IngredientId    Qty Uom   Recipe
201       I-4239  120.0   g  R-10771
209       I-4239   60.0   g  R-10763
301       I-4239   60.0   g   R-4770
    IngredientId

In [51]:
# Converts mapping DataFrame to the Mapping.csv file
path = os.path.join(os.getcwd(), "data", "mapping", "AMS_data", "Mapping.csv")
mapping.to_csv(path, index = False, header = True)