# Climate-Friendly Food Systems (CFFS) Labelling Project

### The University of British Columbia

****

## Assigning Categories

### Imports

In [1]:
import numpy as np
import pandas as pd
import os
import datetime

In [2]:
# RUN ONLY ONCE
# os.chdir is used to change the current directory to the specified path
os.chdir("../../") # Sets path to the repo folder as it is one level above where this file exists!
path = os.getcwd()
print(path)

/Users/vivaanwadhwa/Documents/GitHub/CFFS_sharon_2024


In [3]:
consolidated_df = pd.read_csv(os.path.join(os.getcwd(), "data", "mapping", "AMS_data", "new items","2024-07-09_New_Items.csv"))
consolidated_df.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-4105,,SPICE GARLIC GRANULATED,5.0,g,1.25,L,Y
1,I-4106,,SPICE Pepper Black Grnd,10.0,g,1.25,L,Y
2,I-4107,,SPICE Nutmeg ground,2.0,g,2250.0,ml,Y
3,I-4108,,GRAVY MIX TRIO BROWN 1 STEP,490.0,g,4.5,L,Y
4,I-4109,,ORANGES 5lb,0.5,ea,1.0,ea,Y


In [4]:
# # Sort the dataframe by 'item_num' and 'qty'
# consolidated_df.sort_values(by=['item_num', 'qty'], inplace=True)

# # Find the rows to keep
# rows_to_keep = []
# prev_item_num = None

# for index, row in consolidated_df.iterrows():
#     if row['item_num'] != prev_item_num:
#         rows_to_keep.append(index)
#         prev_item_num = row['item_num']
#     elif row['qty'] < 0:
#         rows_to_keep.pop()
#         rows_to_keep.append(index)

# # Filter the dataframe to keep the selected rows
# unique_rows = consolidated_df.loc[rows_to_keep]

# # Display the unique rows
# unique_rows



In [5]:
# unique_rows[unique_rows['item_num'] == 69025]

In [6]:
categories_df = pd.read_csv("data/Misc/GHG_categories/ghge_factors.csv")
categories_df


Unnamed: 0,Category ID,Food Category,Active Total Supply Chain Emissions (kg CO2 / kg food)
0,1,beef & buffalo meat,41.3463
1,2,lamb/mutton & goat meat,41.6211
2,3,pork (pig meat),9.8315
3,4,"poultry (chicken, turkey)",4.3996
4,5,butter,11.4316
...,...,...,...
58,59,manually adjusted,0.0000
59,60,human labor,0.0000
60,61,kitchen supplies,0.0000
61,62,mushrooms,1.5000


In [7]:
consolidated_df.head()

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup
0,I-4105,,SPICE GARLIC GRANULATED,5.0,g,1.25,L,Y
1,I-4106,,SPICE Pepper Black Grnd,10.0,g,1.25,L,Y
2,I-4107,,SPICE Nutmeg ground,2.0,g,2250.0,ml,Y
3,I-4108,,GRAVY MIX TRIO BROWN 1 STEP,490.0,g,4.5,L,Y
4,I-4109,,ORANGES 5lb,0.5,ea,1.0,ea,Y


In [8]:
# Convert the "item_descrip" and "Food Category" columns to lowercase
consolidated_df['item_descrip'] = consolidated_df['Description'].str.lower()
categories_df['Food Category'] = categories_df['Food Category'].str.lower()

# Initialize new columns
consolidated_df['Food Category'] = np.nan

# Define a function to split the Food Category column
# def split_food_category(category):
#     separators = ['/', '(', ',', ')', '|']
#     for separator in separators:
#         category = category.replace(separator, ' ')
#     return category.strip()

def split_food_category(category):
    separators = ['/', '(', ',', ')', '|']
    for separator in separators:
        category = category.replace(separator, ' ')
    return category.strip()

for index, row in categories_df.iterrows():
    categories_df.at[index, 'Food Category'] = split_food_category(row['Food Category'])
    
for index, row in consolidated_df.iterrows():
    consolidated_df.at[index, 'item_descrip'] = split_food_category(row['item_descrip'])
    
    
# Iterate through each row
for index, row in consolidated_df.iterrows():
    item_descrip = row['item_descrip']
    
    # Split the item_descrip into words
    item_words = item_descrip.split()
    
    # Find matches between the words in item_descrip and the Food Category column
    matches = categories_df[categories_df['Food Category'].apply(lambda x: any(word in x.split() for word in item_words))]
    
    # If there is a match, assign the corresponding values
    if not matches.empty:
        consolidated_df.at[index, 'Food Category'] = matches['Food Category'].iloc[0]
        consolidated_df.at[index, 'CategoryID'] = matches['Category ID'].iloc[0]

# Print the updated DataFrame
consolidated_df





  consolidated_df.at[index, 'Food Category'] = matches['Food Category'].iloc[0]


Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,item_descrip,Food Category
0,I-4105,,SPICE GARLIC GRANULATED,5.0,g,1.25,L,Y,spice garlic granulated,
1,I-4106,,SPICE Pepper Black Grnd,10.0,g,1.25,L,Y,spice pepper black grnd,
2,I-4107,,SPICE Nutmeg ground,2.0,g,2250.00,ml,Y,spice nutmeg ground,
3,I-4108,,GRAVY MIX TRIO BROWN 1 STEP,490.0,g,4.50,L,Y,gravy mix trio brown 1 step,
4,I-4109,,ORANGES 5lb,0.5,ea,1.00,ea,Y,oranges 5lb,
...,...,...,...,...,...,...,...,...,...,...
65,I-4233,,SPICE SUMAC ground,60.0,g,1177.00,Kg,Y,spice sumac ground,
66,I-4234,,Cucumber 3ct,3.0,HEAD,4.00,Kg,Y,cucumber 3ct,
67,I-4235,,Spice Dill Weed,10.0,g,4.00,Kg,Y,spice dill weed,
68,I-4236,,Egg Med packed by dozen,15.0,ea,55.00,ea,Y,egg med packed by dozen,


In [9]:
# for index, row in categories_df.iterrows():
#     for i, r in consolidated_df.iterrows():
#         if consolidated_df.loc[i, 'Food Category'].str.contains(categories_df.loc[index, 'Food Category']):
#             consolidated_df.loc[index, 'Food Category'] = categories_df.loc[index, 'Food Category']
#             consolidated_df.loc[index, 'Category ID'] = categories_df.loc[index, 'Category ID']

In [10]:
# def split_food_category(category):
#     separators = ['/', '(', ',', ')']
#     for separator in separators:
#         category = category.replace(separator, ' ')
#     return category.strip()

# for index, row in categories_df.iterrows():
#     categories_df.at[index, 'Food Category'] = split_food_category(row['Food Category'])

a_df = consolidated_df.iloc[730:760, :]


In [11]:
# Iterate through each row
from re import I


for index, row in consolidated_df.iterrows():
    item_descrip = row['item_descrip']
    
    # Split the item_descrip into words
    item_words = item_descrip.split()
    
    if 'kale' in item_words or 'lettuce' in item_words or 'romaine' in item_words or 'salad' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'other vegetables'
        consolidated_df.at[index, 'CategoryID'] = 40
        
    elif 'base' in item_words and 'veg' in item_words or 'veg' in item_words or 'seaweed' in item_words or 'ck|g&g|salad|kalecaesarw/chick' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'other vegetables'
        consolidated_df.at[index, 'CategoryID'] = 40
        
    elif 'nori' in item_words or 'ck|g&g|salad|kale caesar.' in item_words or 'pickle' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'other vegetables'
        consolidated_df.at[index, 'CategoryID'] = 40

        
    elif 'spinach' in item_words or 'artichoke' in item_words or 'avocado' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'other vegetables'
        consolidated_df.at[index, 'CategoryID'] = 40
        
    elif 'cantaloupe' in item_words or 'papaya' in item_words or 'eggplant' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'other fruits'
        consolidated_df.at[index, 'CategoryID'] = 31
    
    elif 'celery' in item_words or 'coleslaw' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'other vegetables'
        consolidated_df.at[index, 'CategoryID'] = 40
    
    elif 'sprouts' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'other vegetables'
        consolidated_df.at[index, 'CategoryID'] = 40
    
    elif 'milkettes' in item_words or ('milk' in item_words and '2%' in item_words) or 'milk' in item_words:
        consolidated_df.at[index, 'Food Category'] = "milk (cow's milk)"
        consolidated_df.at[index, 'CategoryID'] = 9
    
    
    elif ('milk' in item_words and 'rice' in item_words):
        consolidated_df.at[index, 'Food Category'] = "rice milk"
        consolidated_df.at[index, 'CategoryID'] = 29

            
    elif 'oat' in item_words or 'oatmeal' in item_words or 'oats' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'oats (oatmeal)'
        consolidated_df.at[index, 'CategoryID'] = 23

    elif 'dress' in item_words or 'sauce' in item_words or 'jelly' in item_words or 'ketchup' in item_words or 'chutney' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'sauces & paste'
        consolidated_df.at[index, 'CategoryID'] = 58

    elif 'mayonaise*vegan' in item_words or 'mayonnaise' in item_words or 'mayo' in item_words or 'mustard' in item_words or 'spread' in item_words or 'paste' in item_words or 'dressing' in item_words or 'aioli' in item_words or 'marinade' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'sauces & paste'
        consolidated_df.at[index, 'CategoryID'] = 58
        
        

    # Check if item_descrip contains "cereal"
    elif 'cereal' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'other grains/cereals'
        consolidated_df.at[index, 'CategoryID'] = 21
              

    elif 'coconut' in item_words or 'apricot' in item_words or 'cucumber' in item_words or 'peppers' in item_words or 'salsa' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'other fruits'
        consolidated_df.at[index, 'CategoryID'] = 31 

    elif 'grapes' in item_words or 'kiwi' in item_words or 'mangoes' in item_words or 'honeydew' in item_words or 'peach' in item_words or 'pears' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'other fruits'
        consolidated_df.at[index, 'CategoryID'] = 31 
    
    elif 'squash' in item_words or 'watermelon' in item_words or 'zucchini' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'other fruits'
        consolidated_df.at[index, 'CategoryID'] = 31 
        
    elif 'muffin' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'wheat/rye (bread, pasta, baked goods)'
        consolidated_df.at[index, 'CategoryID'] = 24     
  
        
        
    elif 'pie' in item_words or 'waffle' in item_words or 'pastry' in item_words or 'flour' in item_words or 'bun' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'wheat/rye (bread, pasta, baked goods)'
        consolidated_df.at[index, 'CategoryID'] = 24
    
    elif 'bar' in item_words or 'biscuits' in item_words or 'brownie' in item_words or 'cake' in item_words or 'cookie' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'wheat/rye (bread, pasta, baked goods)'
        consolidated_df.at[index, 'CategoryID'] = 24
        
    
    elif 'loaf' in item_words or 'pretzel' in item_words or 'scone' in item_words or 'tart' in item_words or 'tiramisu' in item_words or 'lady' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'wheat/rye (bread, pasta, baked goods)'
        consolidated_df.at[index, 'CategoryID'] = 24

    elif 'crouton' in item_words or 'ck|prep|granola' in item_words or 'noodles' in item_words or 'perogy' in item_words or 'pasta' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'wheat/rye (bread, pasta, baked goods)'
        consolidated_df.at[index, 'CategoryID'] = 24
    
               
#     if 'co2' in item_words or 'soda' in item_words or 'baking' in item_words:
#         consolidated_df.at[index, 'Food Category'] = 'kitchen supplies'
#         consolidated_df.at[index, 'Category ID'] = 61
            
        
    elif 'hashbrown' in item_words or 'potato*pompom' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'potatoes'
        consolidated_df.at[index, 'CategoryID'] = 41
    
    elif 'dragon' in item_words or 'california' in item_words or 'shrimpmt' in item_words or 'prawn' in item_words or 'shrimp' in item_words or 'prawns' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'crustaceans (shrimp/prawns)'
        consolidated_df.at[index, 'CategoryID'] = 13


    elif 'cumin' in item_words or 'cloves' in item_words or 'ground' in item_words or 'seasoning' in item_words:
        consolidated_df.at[index, 'Food Category'] = "stimulants & spices misc."
        consolidated_df.at[index, 'CategoryID'] = 54
        
    elif 'blend' in item_words or 'chili' in item_words or 'pwdr' in item_words or 'caper' in item_words or 'yeast' in item_words:
        consolidated_df.at[index, 'Food Category'] = "stimulants & spices misc."
        consolidated_df.at[index, 'CategoryID'] = 54
    
    elif 'leaf' in item_words or 'chile' in item_words or 'paprika' in item_words or 'leaves' in item_words:
        consolidated_df.at[index, 'Food Category'] = "stimulants & spices misc."
        consolidated_df.at[index, 'CategoryID'] = 54   
        
    elif 'anise' in item_words or 'cardamom' in item_words or 'mix' in item_words or 'wasabi' in item_words:
        consolidated_df.at[index, 'Food Category'] = "stimulants & spices misc."
        consolidated_df.at[index, 'CategoryID'] = 54 
        
        
    elif 'onion' in item_words or 'leeks' in item_words or 'onions' in item_words or 'shallots' in item_words or ('vanilla' in item_words and 'extract' in item_words):
        consolidated_df.at[index, 'Food Category'] = 'onions and leeks'
        consolidated_df.at[index, 'CategoryID'] = 39
                
    
    elif 'pumpkin' in item_words or 'jackfruit' in item_words or 'pineapple' in item_words or 'raisin' in item_words or 'cherries' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'other fruits'
        consolidated_df.at[index, 'CategoryID'] = 31 
        
    elif 'pumpkin' in item_words and 'kern' in item_words or 'quinoa' in item_words or 'sesame' in item_words or 'seed' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'tree nuts and seeds'
        consolidated_df.at[index, 'CategoryID'] = 26 
        

    elif 'tomato' in item_words or 'tomatillos' in item_words or 'tomatoes' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'tomatoes'
        consolidated_df.at[index, 'CategoryID'] = 37
        
    elif 'soybean' in item_words or 'tofu' in item_words or 'tempeh' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'soybeans/tofu'
        consolidated_df.at[index, 'CategoryID'] = 20
    
    elif 'salt&vin' in item_words or 'balsamic' in item_words or 'vinegar' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'vinegar'
        consolidated_df.at[index, 'CategoryID'] = 57
    
    elif 'sake' in item_words or "wine" in item_words:
        consolidated_df.at[index, 'Food Category'] = 'wine grapes (wine)'
        consolidated_df.at[index, 'CategoryID'] = 51
    
    elif 'lentils' in item_words or 'ck|g&g|salad|medchickpea' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'other legumes'
        consolidated_df.at[index, 'CategoryID'] = 16
    
    elif 'chickpea.' in item_words or 'falafel' in item_words or ('patties' in item_words and 'vegan' in item_words):
        consolidated_df.at[index, 'Food Category'] = 'other legumes'
        consolidated_df.at[index, 'CategoryID'] = 16
    
    
    elif 'tuna' in item_words or 'squid' in item_words or 'cod' in item_words or 'halibut' in item_words or 'salmon' in item_words or 'sal' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'fish (finfish)'
        consolidated_df.at[index, 'CategoryID'] = 12
    
    elif 'raspberry' in item_words or 'blackberry' in item_words or 'blueberry' in item_words or 'strawberry' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'berries'
        consolidated_df.at[index, 'CategoryID'] = 34
        
    elif 'raspberries' in item_words or 'blackberries' in item_words or 'blueberries' in item_words or 'strawberries' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'berries'
        consolidated_df.at[index, 'CategoryID'] = 34
    
    elif 'samosa' in item_words or 'wrap' in item_words or 'sandwich' in item_words or 'soup' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'manually adjusted'
        consolidated_df.at[index, 'CategoryID'] = 59
        
    elif 'coconut' in item_words and 'milk' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'manually adjusted'
        consolidated_df.at[index, 'CategoryID'] = 59
 
    elif 'mandarin' in item_words or 'grapefruit' in item_words or 'lemons' in item_words or 'limes' in item_words or 'mandarins' in item_words or 'oranges' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'citrus fruit'
        consolidated_df.at[index, 'CategoryID'] = 35
        
    elif 'fruit' in item_words or 'fruit.' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'citrus fruit'
        consolidated_df.at[index, 'CategoryID'] = 35
    
    elif 'olive' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'olives (oil)'
        consolidated_df.at[index, 'CategoryID'] = 49
    
    elif 'beets' in item_words or 'dill' in item_words or 'ginger' in item_words or 'garlic' in item_words or 'jicama' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'root vegetables'
        consolidated_df.at[index, 'CategoryID'] = 38
   
    elif 'parsnip' in item_words or 'carrot' in item_words or 'carrots' in item_words or 'turnip' in item_words or 'tapioca' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'root vegetables'
        consolidated_df.at[index, 'CategoryID'] = 38
        
        
    elif 'cilantro' in item_words or 'rosemary' in item_words or 'sage' in item_words or 'thyme' in item_words or 'herbs' in item_words or 'herb' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'herbs'
        consolidated_df.at[index, 'CategoryID'] = 63    
        
    elif 'pea' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'peas'
        consolidated_df.at[index, 'CategoryID'] = 18 
    
    elif 'yogurt' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'yogurt'
        consolidated_df.at[index, 'CategoryID'] = 10
        
    
    elif 'crepe' in item_words or 'croissant' in item_words or 'danish' in item_words or 'donut' in item_words or 'eclair' in item_words or 'bread' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'wheat/rye (bread, pasta, baked goods)'
        consolidated_df.at[index, 'CategoryID'] = 24
    
        # Check if prod_group_descrip is "SUPPLY"
#     if row['prod_group_descrip'] == 'SUPPLY':
#         consolidated_df.at[index, 'Food Category'] = 'kitchen supplies'
#         consolidated_df.at[index, 'Category ID'] = 61
        
#     if row['prod_group_descrip'] == 'DISPOSABLES':
#         consolidated_df.at[index, 'Food Category'] = 'kitchen supplies'
#         consolidated_df.at[index, 'Category ID'] = 61

#     if row['prod_group_descrip'] == 'CLEANING':
#         consolidated_df.at[index, 'Food Category'] = 'kitchen supplies'
#         consolidated_df.at[index, 'Category ID'] = 61
        
    elif row['InventoryGroup'] == 'BREAD':
        consolidated_df.at[index, 'Food Category'] = 'wheat/rye (bread, pasta, baked goods)'
        consolidated_df.at[index, 'CategoryID'] = 24   
    
    elif row['InventoryGroup'] == 'BAKED GOODS':
        consolidated_df.at[index, 'Food Category'] = 'wheat/rye (bread, pasta, baked goods)'
        consolidated_df.at[index, 'CategoryID'] = 24
        
    elif row['InventoryGroup'] == 'CANDIES SNACKS':
        consolidated_df.at[index, 'Food Category'] = 'sugars and sweeteners'
        consolidated_df.at[index, 'CategoryID'] = 43
    
    elif row['InventoryGroup'] == 'BEVERAGE' and pd.isna(row['Food Category']):
        consolidated_df.at[index, 'Food Category'] = 'water & beverages'
        consolidated_df.at[index, 'CategoryID'] = 55
    
    elif 'pringles' in item_words or 'pringle' in item_words or 'potato' in item_words or 'fries' in item_words or 'chip' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'potatoes'
        consolidated_df.at[index, 'CategoryID'] = 41
    
    elif ('cream' in item_words and 'ice' in item_words) or 'magnum' in item_words or ('mint' in item_words and 'chip' in item_words):
        consolidated_df.at[index, 'Food Category'] = 'ice cream'
        consolidated_df.at[index, 'CategoryID'] = 7
    
    elif ('base' in item_words and 'chick' in item_words) or ('thigh' in item_words) or ('wings' in item_words) or 'chicken' in item_words or 'turkey' in item_words or 'chick' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'poultry (chicken, turkey)'
        consolidated_df.at[index, 'CategoryID'] = 4
    
        # Check if item_descrip contains "proscuitto" or "ham"
    elif 'prosciutto' in item_words or 'ham' in item_words or 'bacon' in item_words or 'chorizo' in item_words or 'sausage' in item_words or 'pork' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'pork (pig meat)'
        consolidated_df.at[index, 'CategoryID'] = 3

    elif 'juice' in item_words or 'pop' in item_words or 'coke' in item_words or 'ale' in item_words or 'inst' in item_words or 'tea' in item_words or 'bev' in item_words or 'punch' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'water & beverages'
        consolidated_df.at[index, 'CategoryID'] = 55 
        
    elif 'water' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'water & beverages'
        consolidated_df.at[index, 'CategoryID'] = 55 
 
    elif 'rubbed' in item_words or 'pepper' in item_words or 'powder' in item_words or 'masala' in item_words:
        consolidated_df.at[index, 'Food Category'] = "stimulants & spices misc."
        consolidated_df.at[index, 'CategoryID'] = 54
        
    elif row['InventoryGroup'] == 'SUPPLY':
        consolidated_df.at[index, 'Food Category'] = 'kitchen supplies'
        consolidated_df.at[index, 'CategoryID'] = 61
        
    elif row['InventoryGroup'] == 'DISPOSABLES':
        consolidated_df.at[index, 'Food Category'] = 'kitchen supplies'
        consolidated_df.at[index, 'CategoryID'] = 61

    elif row['InventoryGroup'] == 'CLEANING':
        consolidated_df.at[index, 'Food Category'] = 'kitchen supplies'
        consolidated_df.at[index, 'CategoryID'] = 61
        
    
    elif 'beer' in item_words:
        consolidated_df.at[index, 'Food Category'] = "barley (beer)"
        consolidated_df.at[index, 'CategoryID'] = 50
    
    elif 'sugar' in item_words or 'melona' in item_words or 'honey' in item_words or 'syrup' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'sugars and sweeteners'
        consolidated_df.at[index, 'CategoryID'] = 43 
    
    elif 'salt' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'salt'
        consolidated_df.at[index, 'CategoryID'] = 56
        
    elif 'butter' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'butter'
        consolidated_df.at[index, 'CategoryID'] = 5
          
        
    elif ('cream' in item_words or 'creamo10%' in item_words) and not 'ice' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'cream'
        consolidated_df.at[index, 'CategoryID'] = 8  
        
    elif 'paneer' in item_words or 'cheese' in item_words or 'bocconcini' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'cheese'
        consolidated_df.at[index, 'CategoryID'] = 6
    
    
    elif 'egg' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'eggs'
        consolidated_df.at[index, 'CategoryID'] = 11
        
    
    elif ('milk' in item_words and 'go' in item_words) or ('milk' in item_words and '2%' in item_words) or ('milk' in item_words and 'choc' in item_words):
        consolidated_df.at[index, 'Food Category'] = "milk (cow's milk)"
        consolidated_df.at[index, 'CategoryID'] = 9
        
    elif 'milk' in item_words and 'homo' in item_words:
        consolidated_df.at[index, 'Food Category'] = "milk (cow's milk)"
        consolidated_df.at[index, 'CategoryID'] = 9

    elif ('milk' in item_words and 'soy' in item_words):
        consolidated_df.at[index, 'Food Category'] = "soy milk"
        consolidated_df.at[index, 'CategoryID'] = 30
        
    elif ('milk' in item_words and 'oat' in item_words) or ('oat' in item_words and 'barista' in item_words):
        consolidated_df.at[index, 'Food Category'] = "oat milk"
        consolidated_df.at[index, 'CategoryID'] = 28
    
    elif 'banana' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'bananas'
        consolidated_df.at[index, 'CategoryID'] = 33
    
    elif 'mushrooms' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'mushrooms'
        consolidated_df.at[index, 'CategoryID'] = 62
        
    elif 'corn' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'corn (maize)'
        consolidated_df.at[index, 'CategoryID'] = 22
    
    elif 'lamb' in item_words or 'mutton' in item_words or 'goat' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'lamb/mutton & goat meat'
        consolidated_df.at[index, 'CategoryID'] = 2
        
        
    elif 'broccoli' in item_words or 'cabbage' in item_words or 'cauliflower' in item_words or 'kale' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'cabbages and other brassicas (broccoli)'
        consolidated_df.at[index, 'CategoryID'] = 36   

    elif 'daikon' in item_words or 'brussel' in item_words or 'choy' in item_words or 'broccolini' in item_words or 'gai' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'cabbages and other brassicas (broccoli)'
        consolidated_df.at[index, 'CategoryID'] = 36 

    elif 'co2' in item_words or 'soda' in item_words or 'baking' in item_words or 'sponge' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'kitchen supplies'
        consolidated_df.at[index, 'CategoryID'] = 61
    
    elif 'easter' in item_words or 'candy' in item_words or 'sweetener' in item_words or 'puddings' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'sugars and sweeteners'
        consolidated_df.at[index, 'CategoryID'] = 43
    elif 'pumpkin' in item_words or 'hemp' in item_words or 'nut' in item_words or 'nuts' in item_words or 'almond' in item_words or 'almonds' in item_words or 'seeds' in item_words or 'seed' in item_words or 'chia' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'tree nuts and seeds'
        consolidated_df.at[index, 'CategoryID'] = 26
    elif 'oil' in item_words or 'oils' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'other vegetable oils'
        consolidated_df.at[index, 'CategoryID'] = 44
    elif ('oil' in item_words and 'soybeans' in item_words) or ('oil' in item_words and 'soybean' in item_words) or ('oil' in item_words and 'soy' in item_words):
        consolidated_df.at[index, 'Food Category'] = 'soybeans (oil)'
        consolidated_df.at[index, 'CategoryID'] = 45
    elif 'oil' in item_words and 'palm' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'palm (oil)'
        consolidated_df.at[index, 'CategoryID'] = 46
    elif 'oil' in item_words and 'sunflower' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'sunflower (oil)'
        consolidated_df.at[index, 'CategoryID'] = 47

    elif ('oil' in item_words and 'rapeseed' in item_words) or ('oil' in item_words and 'canola' in item_words):
        consolidated_df.at[index, 'Food Category'] = 'rapeseed/canola (oil)'
        consolidated_df.at[index, 'CategoryID'] = 48
        
    elif 'bean' in item_words or 'beans' in item_words or 'pulses' in item_words or 'chana' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'beans and pulses (dried)'
        consolidated_df.at[index, 'CategoryID'] = 17
    
    
    elif 'chocolate' in item_words or 'arnotts' in item_words or 'choco' in item_words or 'cho' in item_words or 'cocoa' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'cocoa'
        consolidated_df.at[index, 'CategoryID'] = 52
        
    elif 'dough' in item_words or 'cracker' in item_words or 'linguini' in item_words or 'pierogi' in item_words or 'pita' in item_words or 'penne' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'wheat/rye (bread, pasta, baked goods)'
        consolidated_df.at[index, 'CategoryID'] = 24
    
    elif 'rice' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'rice'
        consolidated_df.at[index, 'CategoryID'] = 25
        
    elif 'apples' in item_words or 'apple' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'apples'
        consolidated_df.at[index, 'CategoryID'] = 32  
    elif ('ice' in item_words and 'cream' in item_words) or 'melona' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'ice cream'
        consolidated_df.at[index, 'CategoryID'] = 7
        # Check if item_descrip contains "salami"
    elif 'salami' in item_words or '50/50' in item_words or 'patties' in item_words or ('beef' in item_words and 'roast' in item_words) or 'beef' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'beef & buffalo meat'
        consolidated_df.at[index, 'CategoryID'] = 1
    elif 'ht' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'ice cream'
        consolidated_df.at[index, 'CategoryID'] = 7
    elif 'oyster' in item_words or 'oysters' in item_words or 'mussel' in item_words:
        consolidated_df.at[index, 'Food Category'] = 'mollusks'
        consolidated_df.at[index, 'CategoryID'] = 14
    ## Manual
    elif ("yellow" in item_words and "curry" in item_words) or "green" in item_words:
        consolidated_df.at[index, 'Food Category'] = 'sauces & paste'
        consolidated_df.at[index, 'CategoryID'] = 58
    else:
        consolidated_df.at[index, 'Food Category'] = 'manually adjusted'
        consolidated_df.at[index, 'CategoryID'] = 59
# consolidated_df
# 18386

#     elif 'chip' in item_words or 'potato' in item_words:
#         consolidated_df.at[index, 'Food Category'] = 'potatoes'
#         consolidated_df.at[index, 'Category ID'] = 41
# Print the updated DataFrame

In [12]:
# def split_food_category(category):
#     separators = ['/', '(', ',', ')']
#     for separator in separators:
#         category = category.replace(separator, ' ')
#     return category.strip()

# for index, row in categories_df.iterrows():
#     categories_df.at[index, 'Food Category'] = split_food_category(row['Food Category'])
    
# # Iterate through each row
# for index, row in consolidated_df.iterrows():
#     item_descrip = row['item_descrip']
    
#     # Check if "Food Category" is NaN
#     if pd.isna(row['Food Category']):
    
#         # Split the item_descrip into words
#         item_words = item_descrip.split()
        
#         # Find matches between the words in item_descrip and the Food Category column
#         matches = categories_df[categories_df['Food Category'].apply(lambda x: any(word in x.split() for word in item_words))]
        
#         # If there is a match, assign the corresponding values
#         if not matches.empty:
#             consolidated_df.at[index, 'Food Category'] = matches['Food Category'].iloc[0]
#             consolidated_df.at[index, 'Category ID'] = matches['Category ID'].iloc[0]

# # Print the updated DataFrame
# consolidated_df

In [13]:
filtered_df = consolidated_df[~consolidated_df["CategoryID"].isna()]
filtered_df

# filtered_df = a_df[~a_df["Category ID"].isna()]
# filtered_df


Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,item_descrip,Food Category
0,I-4105,38.0,SPICE GARLIC GRANULATED,5.0,g,1.25,L,Y,spice garlic granulated,root vegetables
1,I-4106,54.0,SPICE Pepper Black Grnd,10.0,g,1.25,L,Y,spice pepper black grnd,stimulants & spices misc.
2,I-4107,54.0,SPICE Nutmeg ground,2.0,g,2250.00,ml,Y,spice nutmeg ground,stimulants & spices misc.
3,I-4108,54.0,GRAVY MIX TRIO BROWN 1 STEP,490.0,g,4.50,L,Y,gravy mix trio brown 1 step,stimulants & spices misc.
4,I-4109,35.0,ORANGES 5lb,0.5,ea,1.00,ea,Y,oranges 5lb,citrus fruit
...,...,...,...,...,...,...,...,...,...,...
65,I-4233,54.0,SPICE SUMAC ground,60.0,g,1177.00,Kg,Y,spice sumac ground,stimulants & spices misc.
66,I-4234,31.0,Cucumber 3ct,3.0,HEAD,4.00,Kg,Y,cucumber 3ct,other fruits
67,I-4235,38.0,Spice Dill Weed,10.0,g,4.00,Kg,Y,spice dill weed,root vegetables
68,I-4236,11.0,Egg Med packed by dozen,15.0,ea,55.00,ea,Y,egg med packed by dozen,eggs


In [14]:
consolidated_df[(consolidated_df['CategoryID'] == 40.0) & (~consolidated_df['ItemId'].duplicated(keep=False))]

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,item_descrip,Food Category
29,I-4160,40.0,Cheese Cheddar Slcd Veg,1.0,g,1.0,PORT,Y,cheese cheddar slcd veg,other vegetables


In [15]:
unfiltered_df = consolidated_df[consolidated_df["CategoryID"].isna()]
unfiltered_df

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,item_descrip,Food Category


In [16]:
consolidated_df.drop(columns={'item_descrip'}, inplace = True)
consolidated_df

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category
0,I-4105,38.0,SPICE GARLIC GRANULATED,5.0,g,1.25,L,Y,root vegetables
1,I-4106,54.0,SPICE Pepper Black Grnd,10.0,g,1.25,L,Y,stimulants & spices misc.
2,I-4107,54.0,SPICE Nutmeg ground,2.0,g,2250.00,ml,Y,stimulants & spices misc.
3,I-4108,54.0,GRAVY MIX TRIO BROWN 1 STEP,490.0,g,4.50,L,Y,stimulants & spices misc.
4,I-4109,35.0,ORANGES 5lb,0.5,ea,1.00,ea,Y,citrus fruit
...,...,...,...,...,...,...,...,...,...
65,I-4233,54.0,SPICE SUMAC ground,60.0,g,1177.00,Kg,Y,stimulants & spices misc.
66,I-4234,31.0,Cucumber 3ct,3.0,HEAD,4.00,Kg,Y,other fruits
67,I-4235,38.0,Spice Dill Weed,10.0,g,4.00,Kg,Y,root vegetables
68,I-4236,11.0,Egg Med packed by dozen,15.0,ea,55.00,ea,Y,eggs


In [17]:
# CHANGE THE ADRESS OF THE EXCEL FILE
consolidated_df.to_excel("data/mapping/AMS_data/new items/" + str(datetime.date.today()) + "_New_Items_assigned.xlsx",index = None, header=True)

In [18]:
unfiltered_df = consolidated_df[consolidated_df["CategoryID"].isna()]
unfiltered_df.shape

(0, 9)

In [19]:
unfiltered_df

Unnamed: 0,ItemId,CategoryID,Description,CaseQty,CaseUOM,PakQty,PakUOM,InventoryGroup,Food Category
