# Climate-Friendly Food Systems (CFFS) Labelling Project

### The University of British Columbia

****

## Adding Description to Conversions

## Set up and Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
import os
import csv
from itertools import islice
from decimal import Decimal
import xml.etree.ElementTree as et
from xml.etree.ElementTree import parse
import openpyxl
import pytest

In [2]:
# RUN ONLY ONCE
# os.chdir is used to change the current directory to the specified path
os.chdir("../../") # Sets path to the repo folder as it is one level above where this file exists!
path = os.getcwd()
print(path)

/Users/vivaanwadhwa/Documents/GitHub/CFFS_sharon_2024


****

## Load Data Files

### Set Data File Path

In [7]:
#OK
filepath_list = glob.glob(os.path.join(os.getcwd(), "data", "raw", "OK 23-24 Sep-Dec*", "*.oc"))
filepath_list += glob.glob(os.path.join(os.getcwd(), "data", "archive", "OK", "OK *", "*.oc"))

#Gather
filepath_list += glob.glob(os.path.join(os.getcwd(), "data", "raw", "Gather 23-24 Sep-Dec*", "*.oc"))
filepath_list += glob.glob(os.path.join(os.getcwd(), "data", "archive", "Gather", "Gather *", "*.oc"))

#Totem
filepath_list += glob.glob(os.path.join(os.getcwd(), "data", "raw", "Totem 23-24 Sep-Dec*", "*.oc"))
filepath_list += glob.glob(os.path.join(os.getcwd(), "data", "archive", "Feast", "Feast *", "*.oc"))

filepath_list

['/Users/vivaanwadhwa/Documents/GitHub/CFFS_sharon_2024/data/raw/OK 23-24 Sep-Dec/OK_ALL_prods_July29_2023.oc',
 '/Users/vivaanwadhwa/Documents/GitHub/CFFS_sharon_2024/data/archive/OK/OK 22-23 Jan-Apr/IPR_Export_GRILL_PM.oc',
 '/Users/vivaanwadhwa/Documents/GitHub/CFFS_sharon_2024/data/archive/OK/OK 22-23 Jan-Apr/IPR_Export_SQUARE.oc',
 '/Users/vivaanwadhwa/Documents/GitHub/CFFS_sharon_2024/data/archive/OK/OK 22-23 Jan-Apr/IPR_Export_GRILL_AM.oc',
 '/Users/vivaanwadhwa/Documents/GitHub/CFFS_sharon_2024/data/archive/OK/OK 22-23 Jan-Apr/IPR_Export_FORNO.oc',
 '/Users/vivaanwadhwa/Documents/GitHub/CFFS_sharon_2024/data/archive/OK/OK 22-23 Jan-Apr/IPR_Export_VEG_PM.oc',
 '/Users/vivaanwadhwa/Documents/GitHub/CFFS_sharon_2024/data/archive/OK/OK 22-23 Jan-Apr/IPR_Export_Missing_items_1.oc',
 '/Users/vivaanwadhwa/Documents/GitHub/CFFS_sharon_2024/data/archive/OK/OK 22-23 Jan-Apr/IPR_Export_VEG_AM.oc',
 '/Users/vivaanwadhwa/Documents/GitHub/CFFS_sharon_2024/data/archive/OK/OK 22-23 Jan-Apr/IPR

In [33]:
# Read items.xml files in the filepath_list and construct a dataframe
Items = []
ItemId = []
Description = []
CaseQty = []
CaseUOM = []
PakQty = []
PakUOM = []
InventoryGroup = []
# from the items xml file, findtext of CaseQty, CaseUOM, PakQty, PakUOM, and InventoryGroup
# then append it on the lists above
for filepath in filepath_list:
    path = filepath + '/items.xml'
    if os.path.isfile(path):
        xtree = et.parse(path)
        xroot = xtree.getroot()
        for item in xtree.iterfind('Item'):
            ItemId.append(item.attrib['id'])
            Description.append(item.findtext('Description'))
            CaseQty.append(item.findtext('CaseQty'))
            CaseUOM.append(item.findtext('CaseUOM'))
            PakQty.append(item.findtext('PakQty'))
            PakUOM.append(item.findtext('PakUOM'))
            InventoryGroup.append(item.findtext('InventoryGroup'))

            
# Create a dataframe from the lists created above.
Items = pd.DataFrame({'ItemId': ItemId, 'Description': Description, 'CaseQty': CaseQty, 
                      'CaseUOM': CaseUOM, 'PakQty': PakQty, 'PakUOM': PakUOM, 'InventoryGroup': InventoryGroup}
                    )

Items.drop_duplicates(inplace=True)

Items.reset_index(drop=True, inplace=True)

In [35]:
Items

Unnamed: 0,ItemId,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-7631,5 SPICE POWDER,1.000,ea,1.000,lb,SPICES
1,I-2353,"9""X6"" 2-COMPARTMENT CLAMSHELL",200.000,CT,1.000,CT,DISPOSABLES
2,I-43239,AGAR AGAR POWDER,500.000,g,1.000,g,SPICES
3,I-57481,AHA WATER SP BLUEB POM PET,24.000,each,1.000,each,BEVERAGE
4,I-57484,AHA WATER SP PEACH HONEY PET,24.000,each,1.000,each,BEVERAGE
...,...,...,...,...,...,...,...
2783,I-3581,PEPPER LEMON TFC,12.000,ea,825.000,g,SPICES
2784,I-54355,SAUCE HOISIN,1.000,5LBS,5.000,lb,FOOD - GROCERY
2785,I-19561,EPAZOTE,1000.000,g,1.000,Kg,PRODUCE
2786,I-2208,MILK BUTTERMILK 3.25%,1.000,L,1.000,L,DAIRY


In [36]:
filepath = glob.glob(os.path.join(os.getcwd(), "data", "cleaning", "update", "Conv_UpdateConv.csv"))
filepath

['/Users/vivaanwadhwa/Documents/GitHub/CFFS_sharon_2024/data/cleaning/update/Conv_UpdateConv.csv']

In [37]:
Conversions = pd.read_csv(filepath[0])
Conversions

Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom
0,I-67659,0.008818,1.0,each,113.398,g
1,I-28697,0.005181,1.0,ea,193.000,g
2,I-47441,0.005181,1.0,ea,193.000,g
3,I-1905,0.012500,1.0,CT,80.000,g
4,I-47440,0.005051,1.0,ea,198.000,g
...,...,...,...,...,...,...
436,I-61562,0.010000,1.0,BAR,100.000,ml
437,I-34478,0.010000,1.0,BAR,100.000,ml
438,I-25211,0.010000,1.0,BAR,100.000,ml
439,I-3823,0.009009,1.0,BAR,111.000,ml


In [39]:
for index,row in Items.iterrows():
    Conversions.loc[Conversions['ConversionId'] == row['ItemId'], 'Description'] = row['Description']
Conversions

Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom,Description
0,I-67659,0.008818,1.0,each,113.398,g,BURGER 50/50 4oz
1,I-28697,0.005181,1.0,ea,193.000,g,BURGER QUINOA YAM
2,I-47441,0.005181,1.0,ea,193.000,g,JAMAICAN PATTY SPICY
3,I-1905,0.012500,1.0,CT,80.000,g,BEEF JERKY PEPPERED 80G
4,I-47440,0.005051,1.0,ea,198.000,g,JAMAICAN PATTY MILD
...,...,...,...,...,...,...,...
436,I-61562,0.010000,1.0,BAR,100.000,ml,
437,I-34478,0.010000,1.0,BAR,100.000,ml,MAGNUM DOUBLE RASPBERRY
438,I-25211,0.010000,1.0,BAR,100.000,ml,
439,I-3823,0.009009,1.0,BAR,111.000,ml,KLONDIKE CONE CHOC/VAN KING


In [40]:
Conversions[Conversions["Description"].isna()]

Unnamed: 0,ConversionId,Multiplier,ConvertFromQty,ConvertFromUom,ConvertToQty,ConvertToUom,Description
129,I-31464,0.000926,1.0,ea,1079.55,g,
130,I-63692,0.001333,1.0,ea,750.00,g,
134,I-10503,1.000000,192.0,g,192.00,g,
136,I-33072,0.006667,1.0,ea,150.00,g,
137,I-70004,1.000000,2210.0,g,2210.00,g,
...,...,...,...,...,...,...,...
421,I-65519,0.014286,1.0,each,70.00,ml,
422,I-42798,0.016667,1.0,BAR,60.00,ml,
427,I-65518,0.014286,1.0,each,70.00,ml,
436,I-61562,0.010000,1.0,BAR,100.00,ml,
