# Imports

In [17]:
from cleaning_words import clean_phrases_in_list_spacy
from cleaning_words import remove_common_words
from cleaning_words import read_common_words
from doc_embeddings import TfidfEmbeddingVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import heapq

import dill
import pickle

# Constants

In [23]:
COMMON_WORDS = '../cleaned_files/common_words.txt'
TFIDF_VECTORIZER_ING = "../word_embeddings_files/vectors/tfidf_vectorizer_ingredients.obj"
WORD2VEC_ING_VECTORS = "../word_embeddings_files/vectors/word2vec_ingredients_vectors.obj"
RECIPE_ID_TO_NAME = "../EDA_files/recipe_id_to_name.obj"
RECIPE_ID_TO_INGREDIENTS = "../EDA_files/recipe_id_to_ingredients.obj"


# Load data

In [3]:
common_words = read_common_words(COMMON_WORDS)

In [4]:
with open(TFIDF_VECTORIZER_ING, 'rb') as pickle_file:
    tfidf_ingredients = dill.load(pickle_file)

In [5]:
with open(WORD2VEC_ING_VECTORS, 'rb') as pickle_file:
    ingredient_vectors_dict = dill.load(pickle_file)

In [18]:
with open(RECIPE_ID_TO_NAME, 'rb') as pickle_file:
    recipe_id_to_name = pickle.load(pickle_file)

In [24]:
with open(RECIPE_ID_TO_INGREDIENTS, 'rb') as pickle_file:
    recipe_id_to_ing = pickle.load(pickle_file)

# Recommendation functions

In [11]:
def get_top_k_recommendations(query, vectors_dict, tfidifVectorizer, common_words, k=10):
    query = clean_phrases_in_list_spacy(query.split())
    query = remove_common_words(query, common_words)
    
    query_embedding = tfidifVectorizer.create_doc_vectors([query])
    cos_sim_scores = list(map(lambda x: (x[0], cosine_similarity(query_embedding[0], 
                                                                 x[1].reshape(1, -1))[0][0]), 
                              vectors_dict.items()))
    
    top_k = heapq.nlargest(k, cos_sim_scores, key=lambda t: t[1])
    return top_k

In [28]:
def print_recommendations(top_k):
    i = 1
    for recipe_id, sim in top_k:
        print(i, "(", sim, ")")
        print(recipe_id_to_name[recipe_id])
        print(recipe_id_to_ing[recipe_id])
        print()
        i += 1

In [33]:
def get_recommendations(query, vectors_dict, tfidifVectorizer, common_words, k=10):
    top_k = get_top_k_recommendations(query, vectors_dict, tfidifVectorizer, common_words, k)
    print_recommendations(top_k)

In [34]:
query = 'fresh tomatoes, onion, cheese, pasta, bread, egg'

In [36]:
get_recommendations(query, ingredient_vectors_dict, tfidf_ingredients, common_words)

1 ( 0.8005910885943857 )
Low-Carb Pizza
['pasta', 'cheese', 'onion']

2 ( 0.7994824888145369 )
Chili & Cheese Ramen Wonder
['pasta', 'diced tomatoes', 'cheese', 'cheese']

3 ( 0.7606241357571717 )
Vegeroni
['pasta', 'cheese']

4 ( 0.7606241357571717 )
Sweet Lunchbox Noodles
['pasta', 'cheese']

5 ( 0.7291152819926507 )
Pasta Seafood Salad
['pasta', 'red onion']

6 ( 0.7238693450007452 )
Smart Alec Pork Neck Bones Spaghetti - Pressure Cooker Style
['pasta', 'cheese', 'water']

7 ( 0.7174492260338161 )
Venison Cacciatore
['onions', 'green peppers', 'bacon', 'crushed tomatoes', 'tomatoes', 'penne pasta']

8 ( 0.7130828826644078 )
Delicious Quick Pasta &amp; Sauce
['olive oil', 'onion', 'garlic cloves', 'tomato sauce', 'salt', 'thyme', 'red pepper flakes', 'fresh basil leaf', 'mozzarella cheese', 'monterey jack cheese', 'cheddar cheese', 'parmesan cheese', 'pasta']

9 ( 0.7129608882777569 )
Pasta Fiesta
['tomatoes', 'fresh basil', 'garlic cloves', 'olive oil', 'rotini pasta', 'penne', 'che