# Imports

In [17]:
from cleaning_words import clean_phrases_in_list_spacy
from cleaning_words import remove_common_words
from cleaning_words import read_common_words
from doc_embeddings import TfidfEmbeddingVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import heapq

import dill
import pickle

# Constants

In [23]:
COMMON_WORDS = '../cleaned_files/common_words.txt'
TFIDF_VECTORIZER_ING = "../word_embeddings_files/vectors/tfidf_vectorizer_ingredients.obj"
WORD2VEC_ING_VECTORS = "../word_embeddings_files/vectors/word2vec_ingredients_vectors.obj"
RECIPE_ID_TO_NAME = "../EDA_files/recipe_id_to_name.obj"
RECIPE_ID_TO_INGREDIENTS = "../EDA_files/recipe_id_to_ingredients.obj"


# Load data

In [3]:
common_words = read_common_words(COMMON_WORDS)

In [4]:
with open(TFIDF_VECTORIZER_ING, 'rb') as pickle_file:
    tfidf_ingredients = dill.load(pickle_file)

In [5]:
with open(WORD2VEC_ING_VECTORS, 'rb') as pickle_file:
    ingredient_vectors_dict = dill.load(pickle_file)

In [18]:
with open(RECIPE_ID_TO_NAME, 'rb') as pickle_file:
    recipe_id_to_name = pickle.load(pickle_file)

In [24]:
with open(RECIPE_ID_TO_INGREDIENTS, 'rb') as pickle_file:
    recipe_id_to_ing = pickle.load(pickle_file)

In [11]:
def get_top_k_recommendations(query, vectors_dict, tfidifVectorizer, common_words, k=10):
    query = clean_phrases_in_list_spacy(query.split())
    query = remove_common_words(query, common_words)
    
    query_embedding = tfidifVectorizer.create_doc_vectors([query])
    cos_sim_scores = list(map(lambda x: (x[0], cosine_similarity(query_embedding[0], 
                                                                 x[1].reshape(1, -1))[0][0]), 
                              vectors_dict.items()))
    
    top_k = heapq.nlargest(k, cos_sim_scores, key=lambda t: t[1])
    return top_k

In [12]:
query = 'fresh tomatoes, onion, cheese, pasta'

In [13]:
top_k = get_top_k_recommendations(query, ingredient_vectors_dict, tfidf_ingredients, common_words)

In [26]:
i = 1
for recipe_id, sim in top_k:
    print(i, "(", sim, ")")
    print(recipe_id_to_name[recipe_id])
    print(recipe_id_to_ing[recipe_id])
    print()
    i += 1

1 ( 0.8328527658055862 )
Chili & Cheese Ramen Wonder
['pasta', 'diced tomatoes', 'cheese', 'cheese']

2 ( 0.8315685773041676 )
Low-Carb Pizza
['pasta', 'cheese', 'onion']

3 ( 0.7857862457487965 )
Vegeroni
['pasta', 'cheese']

4 ( 0.7857862457487965 )
Sweet Lunchbox Noodles
['pasta', 'cheese']

5 ( 0.7668878577401667 )
Pasta Seafood Salad
['pasta', 'red onion']

6 ( 0.7622390042322245 )
Spicy Italian Sausage Penne
['penne pasta', 'tomato sauce', 'onion', 'garlic cloves', 'olive oil', 'roma tomatoes', 'fresh basil', 'fresh parsley', 'anchovy paste', 'salt', 'pepper', 'parmesan cheese']

7 ( 0.760246560080287 )
Pasta Skillet
['bacon', 'onions', 'garlic', 'green pepper', 'stewed tomatoes', 'pasta']

8 ( 0.7567656036754211 )
Tuna Spaghetti Casserole
['tuna fish', 'spaghetti', 'onion', 'garlic', 'tomatoes', 'sugar', 'pepper', 'parsley', 'cheese', 'salt']

9 ( 0.7552504682595351 )
Delicious Quick Pasta &amp; Sauce
['olive oil', 'onion', 'garlic cloves', 'tomato sauce', 'salt', 'thyme', 'red 

In [27]:
top_k

[(24822, 0.8328527658055862),
 (525779, 0.8315685773041676),
 (221786, 0.7857862457487965),
 (233698, 0.7857862457487965),
 (23239, 0.7668878577401667),
 (305692, 0.7622390042322245),
 (29211, 0.760246560080287),
 (20843, 0.7567656036754211),
 (8731, 0.7552504682595351),
 (419077, 0.7548709891388846)]