In [1]:
import os
import pickle
import pandas as pd

from calc_recipe_ingredient_info_distances import collectSomeRecipeRecommendations, get_all_comments, get_all_mutual_info, evalRecommendations, getRecommendationsBasedOnMutualInformationRole, get_graph_nodes, get_all_gt_recipes, get_recipes_per_ingredient, get_recipes_per_ingredient_pairs, get_all_frequencies,getNaiveBayesRecommendations

from recipe_clustering_helpers import (
    create_one_hot_ingredients_per_df, getRecipeIdsForSubTuples, get_hamming_distances
)

In [2]:
ORDERED_RECIPE_IDS_PATH = os.path.abspath("./outputs/sorted_recipe_ids_list.pkl")
TRAIN_COMMENTS_PATH = os.path.abspath("./inputs/train_comments_subs.pkl") # train recipes with substitutions
TEST_COMMENTS_PATH = os.path.abspath("./inputs/test_comments_subs.pkl") # test recipes with substitutions
VAL_COMMENTS_PATH = os.path.abspath("./inputs/val_comments_subs.pkl") # validation recipes with substitutions
GRAPH_NODES_PATH = os.path.abspath("./inputs/graph/nodes_191120.csv")

MUTUAL_INFO_DICT_PATH = os.path.abspath("./outputs/mutual_info_dict_with_self_info.pkl")
RECIPES_PER_INGREDIENT_SMALL_PATH = os.path.abspath(
    "./outputs/recipes_per_ingredient_small.pkl"
)
RECIPES_PER_INGREDIENT_PAIRS_SMALL_PATH = os.path.abspath(
    "./outputs/recipes_per_ingredient_pairs_small.pkl"
)
PROCESSED_RECIPES_PATH = os.path.abspath("./outputs/processed_recipes.pkl")
PATH_ONE_HOT_RECIPE_INGREDIENTS = os.path.abspath("./outputs/one_hot_recipe_ingredients.pkl")

In [3]:

if os.path.isfile(ORDERED_RECIPE_IDS_PATH):
    with open(ORDERED_RECIPE_IDS_PATH, "rb") as file:
        ordered_recipe_ids = pickle.load(file)
recipe_ids_with_ranks = ordered_recipe_ids
ordered_recipe_ids = [recipe[1] for recipe in recipe_ids_with_ranks]

# recipes with comments, lists, names, and gt_truths
extended_recipes = get_all_comments(TRAIN_COMMENTS_PATH, TEST_COMMENTS_PATH, VAL_COMMENTS_PATH)
recipes_extended_dict = {recipe["id"]:recipe for recipe in extended_recipes}


# recipe ingredient df one hot
# INGREDIENT_RECIPE_MATRIX_PATH = os.path.abspath("./outputs/ingredient_recipe_matrix.pkl")
# recipe_ingredient_df = get_recipe_ingredient_df( ingredients, recipes, recipes_per_ingredient, INGREDIENT_RECIPE_MATRIX_PATH)
# recipe_ingredient_df_bool = recipe_ingredient_df.astype(bool)

with open(MUTUAL_INFO_DICT_PATH, "rb") as mutual_info_dict_file:
            mutual_info_dict = pickle.load(mutual_info_dict_file)

ingredients = get_graph_nodes(GRAPH_NODES_PATH)

recipes = get_all_gt_recipes(TRAIN_COMMENTS_PATH, TEST_COMMENTS_PATH,
                                VAL_COMMENTS_PATH, PROCESSED_RECIPES_PATH)
recipes_per_ingredient = get_recipes_per_ingredient(
    ingredients, recipes, RECIPES_PER_INGREDIENT_SMALL_PATH)

recipes_per_ingredient_pairs = get_recipes_per_ingredient_pairs(
    recipes_per_ingredient, RECIPES_PER_INGREDIENT_PAIRS_SMALL_PATH)
recipe_ingredient_counts, recipe_ingredient_pair_counts = get_all_frequencies(recipes_per_ingredient,
                                        recipes_per_ingredient_pairs)


# Checking user comments GT substitution pairs

## Which GT tuple ingredients are not represented as nodes?

In [4]:
all_gt_substitution_tuples = [recipe['subs'] for recipe in extended_recipes]
all_gt_substitution_tuples = sorted(all_gt_substitution_tuples, key=lambda x: x[0])
all_gt_substitution_tuples
print(f"number of GT substitution tuples from user comments: {len(all_gt_substitution_tuples)}")
ingredients_that_are_not_nodes = []
for sub_tuple in all_gt_substitution_tuples:
    if sub_tuple[0] not in ingredients:
        ingredients_that_are_not_nodes.append(sub_tuple[0])
    if sub_tuple[1] not in ingredients:
        ingredients_that_are_not_nodes.append(sub_tuple[1])
print(f"number of ingredients that are not nodes: {len(ingredients_that_are_not_nodes)}")
print(f"list of ingredients that are not nodes: {list(set(ingredients_that_are_not_nodes))}")

number of GT substitution tuples from user comments: 70520
number of ingredients that are not nodes: 2583
list of ingredients that are not nodes: ['oregano_leaves', 'sugar_cookie', 'frozen_hash_browns', 'coarse_ground_mustard', 'vodka_sauce', 'saltines', 'pierogi', 'morton_tender_quick_salt', 'cracked_black_pepper', 'mint_oreo_cookie', 'soy_sausage', 'organic_virgin_coconut_oil', 'dark_chocolate_melts', 'unbleached_all_purpose_flour', 'mixed_glace_fruit', 'table_salt', 'sugar_free_applesauce', 'brownie_mix', 'graham_cracker_sticks', 'nestle_cream', 'salt_free_lemon_pepper', 'blackberry_preserves', 'chocolate_candy_melts', 'ground_pepper', 'yogurt_covered_raisins', 'chives', "mrs_dash's_extra_spicy_seasoning", 'crispbread', 'pecans', 'mushroom_stems', 'peach_baby_food', '98%_fat_free_condensed_cream_of_celery_soup', 'shredded_reduced_fat_cheddar_cheese', 'apricot_preserves', 'frozen_salad_shrimp', 'walnuts', 'sugar_free_fat_free_butterscotch_pudding', 'ground_red_pepper', 'wheat_gluten_

## How often do ground truth substitution tuples appear in recipes?

In [7]:
ground_truth_tuple_recipes = getRecipeIdsForSubTuples(extended_recipes)
ground_truth_tuple_counts = {gt_sub: len(recipe_list) for gt_sub, recipe_list in ground_truth_tuple_recipes.items()}
ground_truth_tuple_counts = dict(sorted(ground_truth_tuple_counts.items(), key=lambda item: item[1], reverse=True))

In [10]:
ground_truth_tuple_counts

#('ground_beef', 'ground_turkey'): 253,
#('vegetable_oil', 'applesauce'): 240,
#('heavy_cream', 'milk'): 219,
#('plain_yogurt', 'sour_cream'): 168,
#('butter', 'applesauce'): 133, # could be interesting because it will not always work
#('parsley', 'cilantro'): 132, # this too
#('salt', 'garlic_salt'): 115,

#('beef', 'boneless_skinless_chicken_breast'): 8,
#('beer', 'beef_broth'): 8,
#('frozen_spinach', 'broccoli'): 8,
#('red_wine', 'red_wine_vinegar'): 8,
#('lean_ground_beef', 'veggie_crumbles'): 8,
#('chicken', 'ground_turkey'): 2, # rare but should often fit, except the preparation could make it different?
#('carrot', 'green_bell_pepper'): 8,
#('mushroom', 'red_bell_pepper'): 7,


{('butter', 'olive_oil'): 546,
 ('water', 'chicken_broth'): 411,
 ('walnut', 'pecan'): 391,
 ('sugar', 'honey'): 372,
 ('margarine', 'butter'): 348,
 ('butter', 'margarine'): 319,
 ('pecan', 'walnut'): 271,
 ('vegetable_oil', 'applesauce'): 238,
 ('ground_beef', 'ground_turkey'): 229,
 ('egg', 'egg_white'): 221,
 ('onion', 'onion_powder'): 212,
 ('sour_cream', 'yogurt'): 211,
 ('water', 'beef_broth'): 199,
 ('water', 'milk'): 198,
 ('water', 'chicken_stock'): 193,
 ('onion', 'shallot'): 191,
 ('garlic_salt', 'garlic_powder'): 185,
 ('milk', 'buttermilk'): 181,
 ('vegetable_oil', 'olive_oil'): 178,
 ('butter', 'oil'): 168,
 ('shortening', 'butter'): 157,
 ('butter', 'coconut_oil'): 155,
 ('oil', 'applesauce'): 148,
 ('lemon_juice', 'lime_juice'): 147,
 ('water', 'broth'): 138,
 ('butter', 'applesauce'): 128,
 ('vegetable_oil', 'butter'): 127,
 ('onion', 'green_onion'): 125,
 ('sugar', 'stevia'): 124,
 ('green_pepper', 'red_pepper'): 123,
 ('raisin', 'dried_cranberry'): 123,
 ('heavy_cre

In [11]:
print(ground_truth_tuple_recipes[('egg_noodle', 'macaroni_noodle')])
print(ground_truth_tuple_counts[('egg_noodle', 'macaroni_noodle')])

['dd43625438', '0fe9a1105b', 'beb22afa59', '99ae52fa77', '5c7aeac637', 'f472881b2e', '4f73ae2f24']
7


In [10]:
one_hot_recipe_ingredients, failed_recipes = create_one_hot_ingredients_per_df(extended_recipes, ingredients, PATH_ONE_HOT_RECIPE_INGREDIENTS)
failed_recipes

[]

In [14]:
recipe_id = "ffb1c2b5ad"
distances_hamming = get_hamming_distances(recipe_id, one_hot_recipe_ingredients)


In [15]:
distances_hamming = list(set(distances_hamming))
distances_hamming = sorted(distances_hamming, key=lambda x: x[1])
distances_hamming

[('999d8c4b0d', 7),
 ('4b6a3ba513', 7),
 ('48dc15499a', 8),
 ('3ce8bc4f90', 8),
 ('b26281c482', 8),
 ('1602aa6218', 8),
 ('3abd84b687', 8),
 ('814df4eedd', 8),
 ('9297c2871e', 8),
 ('d500143819', 8),
 ('703bafb843', 8),
 ('b23374a4b9', 8),
 ('06f24af2ac', 8),
 ('45a362206d', 8),
 ('8f55c987f3', 9),
 ('3b779cd38b', 9),
 ('836b5e02d8', 9),
 ('e76ad6f09a', 9),
 ('669a68b20b', 9),
 ('10c95542ba', 9),
 ('ac88507fc0', 9),
 ('788aaab39a', 9),
 ('b4d4181e59', 9),
 ('1bfacdd84b', 9),
 ('79008d067d', 9),
 ('80c61e32a8', 9),
 ('f1d1c4dd5a', 9),
 ('a3d78df746', 9),
 ('a5118277b6', 9),
 ('ae4f8b7915', 9),
 ('0cb46cd1b6', 9),
 ('af0f5ab387', 9),
 ('bfa5a44e0a', 9),
 ('1ed61bae3f', 9),
 ('2196780e17', 9),
 ('b17964c27e', 9),
 ('e1015b6199', 9),
 ('6438193959', 9),
 ('74c68c90da', 9),
 ('e84d6756ee', 9),
 ('81efe2efbd', 9),
 ('0f7fa9360a', 9),
 ('9186f9c766', 9),
 ('1ac9b1f6fa', 9),
 ('e0702bd6b5', 9),
 ('5ac33cedde', 9),
 ('4b004edfad', 9),
 ('a8588618df', 9),
 ('76c50a9298', 9),
 ('bef14a917a', 9),


In [4]:
def getRecipeFromComments(recipeId, allComments):
    recipe = None
    for commentRecipe in allComments:
        gt_ingredient_found = False
        comment_recipe_id = commentRecipe['id']
        if recipeId == comment_recipe_id:
            if recipe is None:
                recipe = {}
                recipe["id"] = comment_recipe_id
                recipe["subs"] = []
                recipe["ingredients"] = commentRecipe["ingredients"]

            if recipe is not None:
                recipe["subs"].append(commentRecipe["subs"])

    return recipe

# def getRecipeIdsForGTSub(gt_sub, extended_recipes):
#     recipe_ids = []
#     for recipe in extended_recipes:
#         recipe_id = recipe["id"]
#         subs = recipe["subs"]
#         if isinstance(subs, list) and gt_sub in subs:
#             recipe_ids.append(recipe_id)
#         elif gt_sub == subs:
#             recipe_ids.append(recipe_id)
#     return recipe_ids

In [5]:
# sample_gt_sub = ('chicken', 'ground_turkey')
# sample_recipe_ids = getRecipeIdsForGTSub(sample_gt_sub, extended_recipes)
# sample_recipe_ids

['60cb16a27e', '6d6190d9f0', '8f30e98c97']

In [19]:
#ffb1c2b5ad
#999d8c4b0d
#4b6a3ba513

r1 = getRecipeFromComments("ffb1c2b5ad", extended_recipes)
r2 = getRecipeFromComments("999d8c4b0d", extended_recipes)
r3 = getRecipeFromComments("4b6a3ba513", extended_recipes)

In [20]:
r1

{'id': 'ffb1c2b5ad',
 'subs': [('onion', 'scallion')],
 'ingredients': [['flour'],
  ['puff_pastry_sheet', 'puff_pastry_sheets'],
  ['tuna'],
  ['onion', 'onions', 'round_onion'],
  ['celery'],
  ['mayonnaise',
   'lemon_mayonnaise',
   'wasabi_mayonnaise',
   'canola_oil_mayonnaise'],
  ['salt_and_pepper'],
  ['american_cheese'],
  ['egg_wash']]}

In [21]:
r2

{'id': '999d8c4b0d',
 'subs': [('onion', 'dill')],
 'ingredients': [['tilapia_fillet', 'tilapia_fillets', 'basa_fillets'],
  ['onion', 'onions', 'round_onion'],
  ['mayonnaise',
   'lemon_mayonnaise',
   'wasabi_mayonnaise',
   'canola_oil_mayonnaise'],
  ['salt_and_pepper']]}

In [22]:
r3

{'id': '4b6a3ba513',
 'subs': [('lemon_mayonnaise', 'unflavored_gelatin')],
 'ingredients': [['cream_cheese'],
  ['mayonnaise',
   'lemon_mayonnaise',
   'wasabi_mayonnaise',
   'canola_oil_mayonnaise'],
  ['celery'],
  ['onion', 'onions', 'round_onion']]}