# Climate-Friendly Food Systems (CFFS) Labelling Project

### The University of British Columbia

***

## Part III: Update Information and Mapping

## Set up and Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
import os
import xml.etree.ElementTree as et
from xml.etree.ElementTree import parse
from datetime import datetime

In [2]:
# RUN ONLY ONCE
# os.chdir is used to change the current directory to the specified path
os.chdir("../") # Sets path to the repo folder as it is one level above where this file exists!
path = os.getcwd()
print(path)

/Users/vivaanwadhwa/Documents/GitHub/CFFS_sharon_2024


***
## Import Preprocessed Datasets

In [3]:
# Creates a DataFrame from reading a csv file 
Preps = pd.read_csv(os.path.join(os.getcwd(), "data", "cleaning", "AMS_data", "Preps_Unit_Cleaned.csv"))
Preps.head()

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-17305,2022 Hummus prep,1600.0,g,N,1600.0,g
1,P-16793,2022 Pulled Pork Prep,6.0,Kg,Y,6000.0,g
2,P-18380,2023 Babaganoush Prep,750.0,g,N,750.0,g
3,P-18458,2023 Fresh burger Patty prep,2.6,Kg,Y,2600.0,g
4,P-18575,2023 Wings Hot sauce prep,1650.0,g,Y,1650.0,g


In [4]:
# Creates a DataFrame from reading a csv file 
ghge_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "ghge_factors.csv"))
ghge_factors.head()

Unnamed: 0,Category ID,Food Category,Active Total Supply Chain Emissions (kg CO2 / kg food)
0,1,beef & buffalo meat,41.3463
1,2,lamb/mutton & goat meat,41.6211
2,3,pork (pig meat),9.8315
3,4,"poultry (chicken, turkey)",4.3996
4,5,butter,11.4316


In [5]:
# Creates a DataFrame from reading a csv file 
nitro_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "nitrogen_factors.csv"))
nitro_factors.head()

Unnamed: 0,Category ID,Food Category,g N lost/kg product
0,1,beef & buffalo meat,329.5
1,2,lamb/mutton & goat meat,231.15
2,3,pork (pig meat),132.8
3,4,"poultry (chicken, turkey)",116.8
4,5,butter,100.35


In [6]:
# Creates a DataFrame from reading a csv file 
water_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "water_factors.csv"))
water_factors.head()

Unnamed: 0,Category ID,Food Category,Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
0,1,beef & buffalo meat,1677.2,61309.0
1,2,lamb/mutton & goat meat,461.2,258.9
2,3,pork (pig meat),1810.3,54242.7
3,4,"poultry (chicken, turkey)",370.3,333.5
4,5,butter,1010.176,50055.168


In [7]:
# Creates a DataFrame from reading a csv file 
land_factors = pd.read_csv(os.path.join(os.getcwd(), "data", "external", "land_factors.csv"))
land_factors.rename(columns={'km^2 land use/kg product': 'Land Use (m^2)'}, inplace=True)
land_factors['Land Use (m^2)'] *= 1000
land_factors.head()

Unnamed: 0,Category ID,Food Category,Land Use (m^2)
0,1,beef & buffalo meat,0.12645
1,2,lamb/mutton & goat meat,0.1432
2,3,pork (pig meat),0.02102
3,4,"poultry (chicken, turkey)",0.01151
4,5,butter,0.01395


In [8]:
# Load current Items List with assigned Emission Factors Category ID
Items_Assigned = pd.read_csv(os.path.join(os.getcwd(), "data", "mapping", "AMS_data", "Items_List_Assigned.csv"))
Items_Assigned.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N
1,I-8228,55,Grey Goose 1.14L,1.0,fl oz,1.0,fl oz,N
2,I-15803,31,Red Bull Watermelon,1.0,can,1.0,can,N
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N
4,I-8667,58,ITEM GARLIC MAYO,2.0,fl oz,1.0,ml,N


In [11]:
# new_items = pd.read_csv("data/mapping/new items added/New_Items_Added_11.csv")
# new_items = pd.read_csv("data/mapping/new items added/New_Items_2023/New_Items_Added_2023_08_01.csv")
# CHECK FOR CHANGES REQUIRES

# # COMMENTED OUT LINES BELOW, commented it out this time because no new items were added

# new_items = pd.read_csv("data/mapping/new items added/AMS_data/New_Items_2023/New_Items_Added_2023-11-28.csv")
# new_items

In [9]:
item_nonstd = pd.read_csv("data/cleaning/AMS_data/Items_Nonstd.csv")
item_nonstd

Unnamed: 0,IngredientId,Qty,Uom,Recipe,Description
0,I-1971,0.250,ea,R-17284,LIMES
1,I-15803,1.000,can,R-17284,Red Bull Watermelon
2,I-5505,0.250,HEAD,R-18292,Lettuce - Romaine
3,I-11706,1.000,ea,R-18292,Glry Side Fries 2023
4,I-13308,1.000,ea,R-18292,"TORTILLA 12"" FLOUR PRESSED"
...,...,...,...,...,...
129,I-12339,1.000,PORT,P-3173,rice vinegar
130,I-2586,0.005,tank,R-13392,CO2 Tank
131,I-9186,1.000,ea,R-15315,"9""x6 1/8"" NAT HINGED CONTAINER"
132,I-15425,1.000,PORT,R-15426,G21 Southwest Tofu Scrambled


In [10]:
preps_nonstd = pd.read_csv("data/cleaning/AMS_data/Preps_NonstdUom.csv")
preps_nonstd

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-14356,[PREP KAPPA MAKI,6.0,PORT,N,,
1,P-14560,2022 Caesar Wrap prep,1.0,ea,N,,
2,P-9003,2022 Gallery Burger prep,1.0,ea,N,,
3,P-17358,2022 Poutine Prep,1.0,PORT,N,,
4,P-15006,2022 Power Punch Salad prep,1.0,PORT,N,,
5,P-16795,2022 Pulled Pork Sandwich prep,1.0,PORT,N,,
6,P-14552,2022 Vegan Pulled Pork Prep,1.0,ea,N,,
7,P-18327,2023 Appi Platter prep,1.0,PORT,N,,
8,P-18453,2023 GM Tempeh curry prep,1.0,PORT,N,,
9,P-18451,2023 Gm truffle Fries prep,1.0,PORT,N,,


In [11]:
conv_updatecov = pd.read_csv("data/cleaning/update/AMS_data/Conv_UpdateConv.csv")
conv_updatecov

Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom
0,I-1971,0.003731,0.250,ea,16.750,g
1,I-15803,0.004000,1.000,can,250.000,ml
2,I-5505,0.001263,0.250,HEAD,49.500,g
3,I-11706,0.008547,1.000,ea,117.000,g
4,I-13308,0.024540,1.000,ea,40.750,g
...,...,...,...,...,...,...
129,I-12339,0.066667,1.000,PORT,15.000,g
130,I-2586,0.000000,0.005,tank,0.000,g
131,I-9186,0.000000,1.000,ea,0.000,g
132,I-15425,0.010989,1.000,PORT,91.000,g


# Update Conversion_Added.csv

In [12]:
conversions_added = pd.read_csv(os.path.join(os.getcwd(), "data", "cleaning", "update", "AMS_data", "Conversions_Added.csv"))
conversions_added

Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom


In [13]:
# Concatenate the DataFrames
combined_df = pd.concat([conv_updatecov, conversions_added], ignore_index=True)
combined_df

  combined_df = pd.concat([conv_updatecov, conversions_added], ignore_index=True)


Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom
0,I-1971,0.003731,0.250,ea,16.750,g
1,I-15803,0.004000,1.000,can,250.000,ml
2,I-5505,0.001263,0.250,HEAD,49.500,g
3,I-11706,0.008547,1.000,ea,117.000,g
4,I-13308,0.024540,1.000,ea,40.750,g
...,...,...,...,...,...,...
129,I-12339,0.066667,1.000,PORT,15.000,g
130,I-2586,0.000000,0.005,tank,0.000,g
131,I-9186,0.000000,1.000,ea,0.000,g
132,I-15425,0.010989,1.000,PORT,91.000,g


In [14]:
# Add the conv_updatecov file to the masterfile, Conversions_Added
path = os.path.join(os.getcwd(), "data", "cleaning", "AMS_data", "Conversions_Added.csv")
combined_df.to_csv(path, index = False, header = True)

***
## Import Update Info

In [15]:
# Import list of prep that need convert uom to standard uom manually
Manual_PrepU = pd.read_csv(os.path.join(os.getcwd(), "data", "cleaning", "update", "AMS_data", "Preps_UpdateUom.csv"))
# Here we can see that UOM examples are: each, ea, slice
Manual_PrepU.head()

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-14356,[PREP KAPPA MAKI,6,PORT,N,1511.82,g
1,P-14560,2022 Caesar Wrap prep,1,ea,N,433.59,g
2,P-9003,2022 Gallery Burger prep,1,ea,N,501.82,g
3,P-17358,2022 Poutine Prep,1,PORT,N,705.8,g
4,P-15006,2022 Power Punch Salad prep,1,PORT,N,416.73,g


In [16]:
# The row of the DatFrame that contains the PrepId == P-15006
Manual_PrepU.loc[Manual_PrepU["PrepId"] == "P-15006"]

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
4,P-15006,2022 Power Punch Salad prep,1,PORT,N,416.73,g


In [20]:
# Select the file path for new items list with category id
# New_Items_Added = pd.read_csv(os.path.join(os.getcwd(), "data", "mapping", "new items added", "New_Items_Added_10.csv"))
# New_Items_Added.tail(15)

# CHECK FOR CHANGES REQUIRES


# # COMMENTED OUT LINES BELOW, commented it out this time because no new items were added

# New_Items_Added = pd.read_csv(os.path.join(os.getcwd(), "data", "mapping", "new items added", "AMS_data", "New_Items_2023", "New_Items_Added_2023-11-28.csv"))
# New_Items_Added.tail(15)

In [21]:
# # Import list of items that adjusted GHGe factor manually
# Manual_Factor = pd.read_csv(os.path.join(os.getcwd(), "data", "mapping", "Manual_Adjust_Factors.csv"))
# Manual_Factor['Land Use (m^2)'] = 0
# Manual_Factor.head()

### Update Correct Uom for Preps

In [17]:
# Update prep list with manually adjusted uom
for index, row in Manual_PrepU.iterrows():
    PrepId = Manual_PrepU.loc[index, 'PrepId']
    qty = Manual_PrepU.loc[index, 'StdQty']
    uom = Manual_PrepU.loc[index, 'StdUom']
    Preps.loc[Preps['PrepId'] == PrepId, 'StdQty'] = qty
    Preps.loc[Preps['PrepId'] == PrepId, 'StdUom'] = uom

In [18]:
Preps.drop_duplicates(subset=['PrepId'], inplace=True,)

In [19]:
Preps.head()

Unnamed: 0,PrepId,Description,PakQty,PakUOM,InventoryGroup,StdQty,StdUom
0,P-17305,2022 Hummus prep,1600.0,g,N,1600.0,g
1,P-16793,2022 Pulled Pork Prep,6.0,Kg,Y,6000.0,g
2,P-18380,2023 Babaganoush Prep,750.0,g,N,750.0,g
3,P-18458,2023 Fresh burger Patty prep,2.6,Kg,Y,2600.0,g
4,P-18575,2023 Wings Hot sauce prep,1650.0,g,Y,1650.0,g


In [20]:
Preps.shape

(24, 7)

In [21]:
path = os.path.join(os.getcwd(), "data", "cleaning", "AMS_data", "Preps_List_Cleaned.csv")
Preps.to_csv(path, index = False, header = True)

In [22]:
Items_Assigned.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N
1,I-8228,55,Grey Goose 1.14L,1.0,fl oz,1.0,fl oz,N
2,I-15803,31,Red Bull Watermelon,1.0,can,1.0,can,N
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N
4,I-8667,58,ITEM GARLIC MAYO,2.0,fl oz,1.0,ml,N


In [23]:
# New_Items_Added.head()

### Import List of New Items with Emission Factors Category ID Assigned

In [24]:
# REMOVED New_Items_Added from frame right now because no new items added. Change this when adding new items (Feb 1 2024)
# frames = [Items_Assigned, New_Items_Added]
# Items_Assigned_Updated = pd.concat(frames).reset_index(drop=True, inplace=False).drop_duplicates()
# Items_Assigned_Updated.head()

Items_Assigned_Updated = Items_Assigned.drop_duplicates()
Items_Assigned_Updated.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-1971,35,LIMES,0.25,ea,1.0,ea,N
1,I-8228,55,Grey Goose 1.14L,1.0,fl oz,1.0,fl oz,N
2,I-15803,31,Red Bull Watermelon,1.0,can,1.0,can,N
3,I-5505,36,Lettuce - Romaine,0.25,HEAD,1.0,HEAD,N
4,I-8667,58,ITEM GARLIC MAYO,2.0,fl oz,1.0,ml,N


In [25]:
Items_Assigned_Updated

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-1971,35,LIMES,0.250,ea,1.0,ea,N
1,I-8228,55,Grey Goose 1.14L,1.000,fl oz,1.0,fl oz,N
2,I-15803,31,Red Bull Watermelon,1.000,can,1.0,can,N
3,I-5505,36,Lettuce - Romaine,0.250,HEAD,1.0,HEAD,N
4,I-8667,58,ITEM GARLIC MAYO,2.000,fl oz,1.0,ml,N
...,...,...,...,...,...,...,...,...
451,I-2586,61,CO2 Tank,0.005,tank,1.0,tank,N
452,I-9186,61,"9""x6 1/8"" NAT HINGED CONTAINER",1.000,ea,1.0,ea,N
453,I-14843,54,PieR Item Wild Mushroom Mix,2.000,oz,1.0,g,N
454,I-2432,40,VEG BURGER GARDEN 4Z,1.000,ea,1.0,ea,N


In [26]:
Items_Assigned_Updated.shape

(456, 8)

In [27]:
# Double brackets used to specify the column as a dataframe and not a series
# converting the 'CategoryID' column in the Items_Assigned_Updated dataframe to numeric data type using the pd.to_numeric()
# use the apply column to apply the coversion to the entire column
Items_Assigned_Updated[['CategoryID']] = Items_Assigned_Updated[['CategoryID']].apply(pd.to_numeric)

In [28]:
Items_Assigned_Updated

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-1971,35,LIMES,0.250,ea,1.0,ea,N
1,I-8228,55,Grey Goose 1.14L,1.000,fl oz,1.0,fl oz,N
2,I-15803,31,Red Bull Watermelon,1.000,can,1.0,can,N
3,I-5505,36,Lettuce - Romaine,0.250,HEAD,1.0,HEAD,N
4,I-8667,58,ITEM GARLIC MAYO,2.000,fl oz,1.0,ml,N
...,...,...,...,...,...,...,...,...
451,I-2586,61,CO2 Tank,0.005,tank,1.0,tank,N
452,I-9186,61,"9""x6 1/8"" NAT HINGED CONTAINER",1.000,ea,1.0,ea,N
453,I-14843,54,PieR Item Wild Mushroom Mix,2.000,oz,1.0,g,N
454,I-2432,40,VEG BURGER GARDEN 4Z,1.000,ea,1.0,ea,N


In [29]:
path = os.path.join(os.getcwd(), "data", "mapping", "AMS_data", "Items_List_Assigned.csv")
Items_Assigned_Updated.to_csv(path, index = False, header = True)

## Mapping Items to Footprint Factors

In [30]:
# DEFINITION OF MAPPING: assigning certain attributes to data points based on criteria or predefined rules.

# Map GHG footprint factors
# merges two data frames Items_Assigned_Updated and ghge_factors on the columns CategoryID and Category ID, respectively. The 
# resulting data frame is stored in mapping.

# how='left' specifies that a left join is done between Items_Assigned_Updated and ghge_factors.
# left_on='CategoryID' specifies that the join condition for Items_Assigned_Updated should be based on the 'CategoryID' column.
# right_on='Category ID' specifies that the join condition for ghge_factors should be based on the 'Category ID' column.

mapping = pd.merge(Items_Assigned_Updated, ghge_factors.loc[:,['Category ID','Food Category','Active Total Supply Chain Emissions (kg CO2 / kg food)']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')
# if row in mapping has CategoryId == NaN then it assigns the value of the column Active Total Supply Chain Emissions (kg CO2 / kg food)
# to zero
for index in mapping.index:
    if np.isnan(mapping.loc[index,'Category ID']):
        mapping.loc[index,'Active Total Supply Chain Emissions (kg CO2 / kg food)'] = 0
# drops the columns Category ID and Food Category
mapping = mapping.drop(columns=['Category ID', 'Food Category'])
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Active Total Supply Chain Emissions (kg CO2 / kg food)
0,I-1971,35,LIMES,0.250,ea,1.0,ea,N,0.3942
1,I-8228,55,Grey Goose 1.14L,1.000,fl oz,1.0,fl oz,N,0.0000
2,I-15803,31,Red Bull Watermelon,1.000,can,1.0,can,N,0.4306
3,I-5505,36,Lettuce - Romaine,0.250,HEAD,1.0,HEAD,N,0.6220
4,I-8667,58,ITEM GARLIC MAYO,2.000,fl oz,1.0,ml,N,0.0000
...,...,...,...,...,...,...,...,...,...
451,I-2586,61,CO2 Tank,0.005,tank,1.0,tank,N,0.0000
452,I-9186,61,"9""x6 1/8"" NAT HINGED CONTAINER",1.000,ea,1.0,ea,N,0.0000
453,I-14843,54,PieR Item Wild Mushroom Mix,2.000,oz,1.0,g,N,9.3703
454,I-2432,40,VEG BURGER GARDEN 4Z,1.000,ea,1.0,ea,N,0.5029


In [31]:
# Map nitrogen footprint factors
mapping = pd.merge(mapping, nitro_factors.loc[:,['Category ID','Food Category','g N lost/kg product']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')

for index in mapping.index:
    if np.isnan(mapping.loc[index,'Category ID']):
        mapping.loc[index,'g N lost/kg product'] = 0

mapping = mapping.drop(columns=['Category ID', 'Food Category'])
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product
0,I-1971,35,LIMES,0.250,ea,1.0,ea,N,0.3942,2.70
1,I-8228,55,Grey Goose 1.14L,1.000,fl oz,1.0,fl oz,N,0.0000,0.00
2,I-15803,31,Red Bull Watermelon,1.000,can,1.0,can,N,0.4306,2.70
3,I-5505,36,Lettuce - Romaine,0.250,HEAD,1.0,HEAD,N,0.6220,7.90
4,I-8667,58,ITEM GARLIC MAYO,2.000,fl oz,1.0,ml,N,0.0000,6.75
...,...,...,...,...,...,...,...,...,...,...
451,I-2586,61,CO2 Tank,0.005,tank,1.0,tank,N,0.0000,0.00
452,I-9186,61,"9""x6 1/8"" NAT HINGED CONTAINER",1.000,ea,1.0,ea,N,0.0000,0.00
453,I-14843,54,PieR Item Wild Mushroom Mix,2.000,oz,1.0,g,N,9.3703,6.75
454,I-2432,40,VEG BURGER GARDEN 4Z,1.000,ea,1.0,ea,N,0.5029,7.90


In [32]:
# Map land footprint factors
mapping = pd.merge(mapping, land_factors.loc[:,['Category ID','Food Category','Land Use (m^2)']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')

for index in mapping.index:
    if np.isnan(mapping.loc[index,'Category ID']):
        mapping.loc[index,'Land Use (m^2)'] = 0

mapping = mapping.drop(columns=['Category ID', 'Food Category'])
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2)
0,I-1971,35,LIMES,0.250,ea,1.0,ea,N,0.3942,2.70,0.00042
1,I-8228,55,Grey Goose 1.14L,1.000,fl oz,1.0,fl oz,N,0.0000,0.00,0.00000
2,I-15803,31,Red Bull Watermelon,1.000,can,1.0,can,N,0.4306,2.70,0.00071
3,I-5505,36,Lettuce - Romaine,0.250,HEAD,1.0,HEAD,N,0.6220,7.90,0.00230
4,I-8667,58,ITEM GARLIC MAYO,2.000,fl oz,1.0,ml,N,0.0000,6.75,0.00000
...,...,...,...,...,...,...,...,...,...,...,...
451,I-2586,61,CO2 Tank,0.005,tank,1.0,tank,N,0.0000,0.00,0.00000
452,I-9186,61,"9""x6 1/8"" NAT HINGED CONTAINER",1.000,ea,1.0,ea,N,0.0000,0.00,0.00000
453,I-14843,54,PieR Item Wild Mushroom Mix,2.000,oz,1.0,g,N,9.3703,6.75,0.03100
454,I-2432,40,VEG BURGER GARDEN 4Z,1.000,ea,1.0,ea,N,0.5029,7.90,0.00119


In [33]:
# Map water footprint factors
# mapping: DataFrame that is used to assign sustainability-related factors (greenhouse gas emissions, nitrogen loss, and 
# water footprint) to food items based on their category and other attributes. 
mapping = pd.merge(mapping, water_factors.loc[:,['Category ID','Food Category','Freshwater Withdrawals (L/FU)', 'Stress-Weighted Water Use (L/FU)']], 
                  how = 'left',
                  left_on = 'CategoryID', 
                  right_on = 'Category ID')

for index in mapping.index:
    if np.isnan(mapping.loc[index,'Category ID']):
        mapping.loc[index,'Freshwater Withdrawals (L/FU)'] = 0
        mapping.loc[index,'Stress-Weighted Water Use (L/FU)'] = 0

mapping = mapping.drop(columns=['Category ID', 'Food Category'])
mapping.drop_duplicates(subset=["ItemId"], inplace=True)
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2),Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
0,I-1971,35,LIMES,0.250,ea,1.0,ea,N,0.3942,2.70,0.00042,37.400,1345.500
1,I-8228,55,Grey Goose 1.14L,1.000,fl oz,1.0,fl oz,N,0.0000,0.00,0.00000,1.000,1.000
2,I-15803,31,Red Bull Watermelon,1.000,can,1.0,can,N,0.4306,2.70,0.00071,3.500,4.700
3,I-5505,36,Lettuce - Romaine,0.250,HEAD,1.0,HEAD,N,0.6220,7.90,0.00230,54.500,2483.400
4,I-8667,58,ITEM GARLIC MAYO,2.000,fl oz,1.0,ml,N,0.0000,6.75,0.00000,20.225,1134.925
...,...,...,...,...,...,...,...,...,...,...,...,...,...
451,I-2586,61,CO2 Tank,0.005,tank,1.0,tank,N,0.0000,0.00,0.00000,0.000,0.000
452,I-9186,61,"9""x6 1/8"" NAT HINGED CONTAINER",1.000,ea,1.0,ea,N,0.0000,0.00,0.00000,0.000,0.000
453,I-14843,54,PieR Item Wild Mushroom Mix,2.000,oz,1.0,g,N,9.3703,6.75,0.03100,24.900,220.300
454,I-2432,40,VEG BURGER GARDEN 4Z,1.000,ea,1.0,ea,N,0.5029,7.90,0.00119,81.300,2939.500


In [34]:
mapping[mapping["ItemId"] == "I-1874"]

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2),Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
16,I-1874,38,GARLIC WHOLE PEELED,200.0,g,30.0,oz,N,0.3062,7.9,0.00032,9.9,37.9


In [35]:
mapping["CategoryID"].isnull().sum()

0

### Manully Adjust Footprint Factor for Specific Items

In [36]:
# # For Manual_Factor: dataframe with items that adjusted GHGe factor manually.
# # It takes the id for Manual_Factor for the index being iterated and if it is equal to item id of mapping dataframe then it sets
# # the values of the columns to the manually adjusted values
# # Note: the values for the columns in mapping DataFrame is adjusted and not Manual_Factor DataFrame
# for index, row in Manual_Factor.iterrows():
#     itemId = Manual_Factor.loc[index, 'ItemId']
#     ghge = Manual_Factor.loc[index, 'Active Total Supply Chain Emissions (kg CO2 / kg food)']
#     nitro = Manual_Factor.loc[index, 'g N lost/kg product']
#     water = Manual_Factor.loc[index, 'Freshwater Withdrawals (L/FU)']
#     land = Manual_Factor.loc[index, 'Land Use (m^2)']
#     str_water = Manual_Factor.loc[index, 'Stress-Weighted Water Use (L/FU)']
#     mapping.loc[mapping['ItemId'] == itemId, 'Active Total Supply Chain Emissions (kg CO2 / kg food)'] = ghge
#     mapping.loc[mapping['ItemId'] == itemId, 'g N lost/kg product'] = nitro
#     mapping.loc[mapping['ItemId'] == itemId, 'Freshwater Withdrawals (L/FU)'] = water
#     mapping.loc[mapping['ItemId'] == itemId, 'Stress-Weighted Water Use (L/FU)'] = str_water
#     mapping.loc[mapping['ItemId'] == itemId, 'Land Use (m^2)'] = land

In [37]:
mapping.drop_duplicates(subset = ['ItemId'], inplace=True)
mapping.dtypes

ItemId                                                     object
CategoryID                                                  int64
Description                                                object
CaseQty                                                   float64
CaseUOM                                                    object
PakQty                                                    float64
PakUOM                                                     object
InventoryGroup                                             object
Active Total Supply Chain Emissions (kg CO2 / kg food)    float64
g N lost/kg product                                       float64
Land Use (m^2)                                            float64
Freshwater Withdrawals (L/FU)                             float64
Stress-Weighted Water Use (L/FU)                          float64
dtype: object

In [38]:
mapping.shape

(456, 13)

In [39]:
mapping

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Active Total Supply Chain Emissions (kg CO2 / kg food),g N lost/kg product,Land Use (m^2),Freshwater Withdrawals (L/FU),Stress-Weighted Water Use (L/FU)
0,I-1971,35,LIMES,0.250,ea,1.0,ea,N,0.3942,2.70,0.00042,37.400,1345.500
1,I-8228,55,Grey Goose 1.14L,1.000,fl oz,1.0,fl oz,N,0.0000,0.00,0.00000,1.000,1.000
2,I-15803,31,Red Bull Watermelon,1.000,can,1.0,can,N,0.4306,2.70,0.00071,3.500,4.700
3,I-5505,36,Lettuce - Romaine,0.250,HEAD,1.0,HEAD,N,0.6220,7.90,0.00230,54.500,2483.400
4,I-8667,58,ITEM GARLIC MAYO,2.000,fl oz,1.0,ml,N,0.0000,6.75,0.00000,20.225,1134.925
...,...,...,...,...,...,...,...,...,...,...,...,...,...
451,I-2586,61,CO2 Tank,0.005,tank,1.0,tank,N,0.0000,0.00,0.00000,0.000,0.000
452,I-9186,61,"9""x6 1/8"" NAT HINGED CONTAINER",1.000,ea,1.0,ea,N,0.0000,0.00,0.00000,0.000,0.000
453,I-14843,54,PieR Item Wild Mushroom Mix,2.000,oz,1.0,g,N,9.3703,6.75,0.03100,24.900,220.300
454,I-2432,40,VEG BURGER GARDEN 4Z,1.000,ea,1.0,ea,N,0.5029,7.90,0.00119,81.300,2939.500


In [40]:
ingredients = pd.read_csv("data/preprocessed/AMS_data/Ingredients_List.csv")
ingredients

Unnamed: 0,IngredientId,Qty,Uom,Recipe
0,I-1971,0.25,ea,R-17284
1,I-8228,1.00,fl oz,R-17284
2,I-15803,1.00,can,R-17284
3,I-5505,0.25,HEAD,R-18292
4,I-8667,2.00,fl oz,R-18292
...,...,...,...,...
2153,I-2262,1.00,g,P-18049
2154,I-14434,80.00,g,P-18049
2155,I-15477,1.00,PORT,P-18049
2156,I-15637,20.00,g,P-18049


In [41]:
map_list = mapping["ItemId"].unique()
absent_list = []

for item in ingredients["IngredientId"].unique():
    if item not in map_list:
        absent_list.append(item)

# absent_list contains the IngredientIds that are not present in mapping but are present in the ingredients DataFrame
print(absent_list)

['I-11706', 'I-16780', 'I-14715', 'I-15427', 'I-13956', 'P-14372', 'P-15606', 'I-16221', 'I-16572', 'I-6243', 'I-17039', 'I-17040', 'I-9013', 'I-13414', 'I-17486', 'I-16570', 'I-16574', 'I-16575', 'I-12266', 'I-17316', 'I-16787', 'P-5814', 'I-17037', 'I-11125', 'I-14905', 'I-15024', 'I-14826', 'I-16856', 'I-16748', 'P-18335', 'I-16844', 'I-17314', 'I-16223', 'I-14504', 'I-15477', 'I-9017', 'P-15013', 'I-16760', 'I-16778', 'I-17352', 'P-15419', 'P-17305', 'I-17378', 'I-17360', 'I-15051', 'I-16571', 'P-16793', 'I-15007', 'I-15008', 'I-15010', 'I-16794', 'I-16860', 'I-16784', 'I-16785', 'I-16786', 'I-16834', 'I-12209', 'I-16789', 'I-18349', 'I-16857', 'I-17350', 'I-15001', 'P-6811', 'I-15486', 'I-16855', 'P-14551', 'I-15401', 'I-16777', 'I-5816', 'P-18381', 'I-15037', 'I-16862', 'I-17013', 'I-17014', 'P-18336', 'I-17941', 'P-18380', 'P-18458', 'I-18274', 'P-18530', 'P-18052', 'I-15038', 'I-15412', 'I-14984', 'I-16994', 'I-18317', 'I-18268', 'I-14150', 'P-18321', 'P-18329', 'I-7119', 'I-14

In [42]:
# Converts mapping DataFrame to the Mapping.csv file
path = os.path.join(os.getcwd(), "data", "mapping", "AMS_data", "Mapping.csv")
mapping.to_csv(path, index = False, header = True)