# Ingredient Sanity Check

***

### Imports

In [1]:
import pandas as pd

In [2]:
ingredients = pd.read_csv("data/preprocessed/Ingredients_List.csv")
items = pd.read_csv("data/preprocessed/Items_List.csv")
preps = pd.read_csv("data/preprocessed/Preps_List.csv")
products = pd.read_csv("data/preprocessed/Products_List.csv")
mapping = pd.read_csv("data/mapping/Mapping.csv")
RESTAURANT_NAME = "TOTEM"

In [3]:
def get_ingredients(recipe_id):
    """
    Retrieve all ingredients and their quantities for a given recipe.
    This function takes a recipe ID and returns a DataFrame containing all the ingredients
    required for the recipe, along with their quantities. It handles nested ingredients
    by recursively fetching all related ingredients.
    Parameters:
    recipe_id (str): The ID of the recipe for which ingredients are to be retrieved.
    Returns:
    pd.DataFrame: A DataFrame containing the ingredients with their quantities.
    """
    recipe = ingredients[ingredients["Recipe"] == recipe_id]
    def get_all_ingredients(recipe):
        all_ingredients = pd.DataFrame()  # Initialize an empty DataFrame to store all ingredients

        for index, row in recipe.iterrows():
            temp = ingredients[ingredients["Recipe"] == row["IngredientId"]]
            if not temp.empty:
                all_ingredients = pd.concat([all_ingredients, temp, get_all_ingredients(temp)], ignore_index=True)
            all_ingredients = pd.concat([all_ingredients, temp], ignore_index=True)
        
        return pd.concat([recipe, all_ingredients], ignore_index=True)  # Return original recipe with all ingredients

    # Calling the function with the recipe DataFrame
    complete_recipe = get_all_ingredients(recipe)
    recipe_items = []
    recipe_preps = []

    # Seperate into Items and Preps
    items_list = complete_recipe[complete_recipe["IngredientId"].str.startswith("I")]
    items_list.drop_duplicates(subset="IngredientId", keep="first", inplace=True)
    preps_list = complete_recipe[complete_recipe["IngredientId"].str.startswith("P")]
    preps_list.drop_duplicates(subset="IngredientId", keep="first", inplace=True)

    for index,row in complete_recipe.iterrows():
        temp = items[items["ItemId"] == row["IngredientId"]]
        recipe_items.append(temp)
        temp2 = preps[preps["PrepId"] == row["IngredientId"]]
        recipe_preps.append(temp2)
    
    recipe_items = pd.concat(recipe_items, ignore_index=True)
    recipe_preps = pd.concat(recipe_preps, ignore_index=True)
    recipe_items.drop_duplicates(subset="ItemId", keep="first", inplace=True)
    recipe_preps.drop_duplicates(subset="PrepId", keep="first", inplace=True)
    assert len(recipe_items) == len(items_list)
    recipe_items_with_quants = pd.merge(recipe_items, items_list, left_on="ItemId", right_on="IngredientId")
    recipe_items_with_quants.drop(columns=["IngredientId","CaseQty","CaseUOM","PakQty","PakUOM", "InventoryGroup", "Conversion","InvFactor"], inplace=True)
    return recipe_items_with_quants

In [4]:
# Initialize a list to store all recipe and ingredient details
data = []

for _, row in products.iterrows():
    # Extract recipe details
    recipe_id = row['ProdId']
    recipe_name = row.get('Description', 'Unknown')
    
    # Get the ingredients for the recipe
    ingres = get_ingredients(row["ProdId"])
    
    for _, ingredient in ingres.iterrows():
        # Filter the mapping for the current ingredient
        item_mapping = mapping[mapping["ItemId"] == ingredient["ItemId"]]
        
        # Safely extract values from item_mapping
        category_id = item_mapping['CategoryID'].iloc[0] if not item_mapping.empty else "Unknown"
        food_category = item_mapping['Food Category_y'].iloc[0] if not item_mapping.empty else "Unknown"
        
        # Append a dictionary with all details to the data list
        data.append({
            "Recipe ID": recipe_id,
            "Recipe Name": recipe_name,
            "Ingredient": ingredient['Description'],
            "Quantity": ingredient['Qty'],
            "Unit": ingredient['Uom'],
            "Category ID": category_id,
            "Emission Category": food_category,
        })

preps_list = ["P-35417","P-74324","P-62546"]

for prep in preps_list:
    recipe_id = prep
    try:
        recipe_name = products[products["ProdId"] == prep]["Description"].iloc[0]
    except:
        continue

    display(preps[preps["PrepId"] == prep])
    
    # Get the ingredients for the recipe
    ingres = get_ingredients(recipe_id)
    
    for _, ingredient in ingres.iterrows():
        # Filter the mapping for the current ingredient
        item_mapping = mapping[mapping["ItemId"] == ingredient["ItemId"]]
        
        # Safely extract values from item_mapping
        category_id = item_mapping['CategoryID'].iloc[0] if not item_mapping.empty else "Unknown"
        food_category = item_mapping['Food Category_y'].iloc[0] if not item_mapping.empty else "Unknown"
        
        # Append a dictionary with all details to the data list
        data.append({
            "Recipe ID": recipe_id,
            "Recipe Name": recipe_name,
            "Ingredient": ingredient['Description'],
            "Quantity": ingredient['Qty'],
            "Unit": ingredient['Uom'],
            "Category ID": category_id,
            "Emission Category": food_category,
        })


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  items_list.drop_duplicates(subset="IngredientId", keep="first", inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  preps_list.drop_duplicates(subset="IngredientId", keep="first", inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  items_list.drop_duplicates(subset="IngredientId", keep="first", inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs

In [5]:
# Create a DataFrame from the collected data
df = pd.DataFrame(data)

Still have to consider that some items will only have 1 item in the recipe because sometimes the item is the product itself

In [14]:
## MANUAL CHECK
# A way to check for items that are not made in house
df[df["Recipe ID"].map(df["Recipe ID"].value_counts()) == 1]

Unnamed: 0,Recipe ID,Recipe Name,Ingredient,Quantity,Unit,Category ID,Emission Category
4,R-44853,ADD|Poached Egg,EGG FREE*RUN LOOSE PACK,1.0,CT,11.0,eggs
5,R-52741,BAKE|Croissant|Plain,CROISSANT BUTTER*LARGE,1.0,ea,24.0,"wheat/rye (bread, pasta, baked goods)"
2103,R-24157,DNR|Add Bacon|2 pcs,BACON 2.5MM NATURALLY SMKD,57.0,g,3.0,pork (pig meat)
2426,R-73167,DNR|Sausage|Beyond Breakfast,BEYOND MEAT*SAUSAGE PATTY1.6oz,1.0,ea,20.0,soybeans/tofu
2427,R-27423,DNR|Sausage|Turkey Breakfast,SAUSAGE BREAKFAST*TURKEY,1.0,ea,4.0,"poultry (chicken, turkey)"
2969,R-64290,HC|Spicy Jamaican Patty,JAMAICAN PATTY SPICY,1.0,ea,1.0,beef & buffalo meat
3324,R-71591,HS|Focaccia Bread Sticks|SIDE,FOCACCIA HERB*SHEET,1.0,LOAF,24.0,"wheat/rye (bread, pasta, baked goods)"
4749,R-73508,SOUP|Goodly|Beef Barley Shroom,SOUP BEEF*BARLEY MUSHROOM,300.0,ml,1.0,beef & buffalo meat
4750,R-73213,SOUP|Goodly|Chicken Orzo,SOUP GOODLY*CHICKEN ORZO,300.0,ml,4.0,"poultry (chicken, turkey)"
4751,R-73507,SOUP|Goodly|Corn Chowder,SOUP GOODLY*CORN CHOWDER SPICY,300.0,ml,22.0,corn (maize)


In [None]:
# Save the DataFrame to a CSV file
df.to_csv(f"ingredients_{RESTAURANT_NAME}.csv", index=False)
print("Data successfully saved to ingredients_SAGECATERING.csv")