# Recommender Model

In [None]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
import scipy.sparse
import pickle

In [16]:
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\annie\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [17]:
# initialize the lemmatizer
lemmatizer = WordNetLemmatizer()

In [37]:
# define a function to lemmatize an input string
def lemmatize_string(string):
    string_lower = string.lower()
    # tokenize the string into individual words
    tokens = word_tokenize(string_lower)
    # lemmatize each token
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return ' '.join(lemmatized_tokens)

# define a function to lemmatize an input list
def lemmatize_list(list):
    return [lemmatizer.lemmatize(item.lower()) for item in list]

In [2]:
# load precomputed tfidf matrix and vectorizer
tfidf_matrix = scipy.sparse.load_npz('models/tfidf_matrix.npz')
with open('models/tfidf_vectorizer.pkl', 'rb') as f:
    vectorizer = pickle.load(f)
data = pd.read_csv('Datasets/recipes_food_com_revised.csv')

Test combined cleaned data

In [35]:
# Initialize NearestNeighbors model and fit on the tfidf_matrix
nearest_neighbors = NearestNeighbors(metric='cosine', algorithm='brute')
nearest_neighbors.fit(tfidf_matrix)

def recommend(preferred_ingredients, top_n=5, excluded_ingredients=None):
    if excluded_ingredients is None:
        excluded_ingredients = []
    # if excluded_ingredients is a string, convert to list
    elif isinstance(excluded_ingredients, str):
        excluded_ingredients = excluded_ingredients.lower().split(', ')

    # preprocess input by lemmatizing
    lemmatized_ingredients = lemmatize_string(preferred_ingredients)

    # Transform the user's input ingredients into the vector space
    user_vector = vectorizer.transform([lemmatized_ingredients])
    
    # Find the top N nearest neighbors
    distances, indices = nearest_neighbors.kneighbors(user_vector, n_neighbors=top_n)
    
    # Retrieve recommended recipes and their similarity scores
    recommendations = data.iloc[indices[0]].copy()
    recommendations['Similarity'] = 1 - distances[0]  # Similarity = 1 - distance (cosine)

    # filter out recipes containing excluded ingredients
    lemmatized_excluded_ingredients = lemmatize_list(excluded_ingredients)
    def contains_excluded(ingredients):
        for excluded in lemmatized_excluded_ingredients:
            if excluded in ingredients:
                return True
        return False
    
    recommendations = recommendations[~recommendations['NLP_Ingredients'].apply(contains_excluded)]
    
    return recommendations[['Name', 'Similarity', 'IngredientsExtracted', 'NLP_Ingredients']]


In [27]:
ingredients_list = 'tomato bread beef carrot'

recommend(ingredients_list)

Unnamed: 0,Name,Similarity,IngredientsExtracted,NLP_Ingredients
259308,Mutt Meatballs,0.689196,"('ground beef', 'cheddar cheese', 'carrot', 'b...","('bread', 'carrot', 'cheddar cheese', 'egg', '..."
158400,Vegetable Beef Noodle Soup,0.659529,"('ground beef', 'chopped tomato', 'carrot', 'c...","('beef', 'carrot', 'celery', 'ground beef', 't..."
323064,Gerden Meatballs,0.647784,"('ground beef', 'carrot', 'broccoli', 'bread',...","('bread', 'broccoli', 'carrot', 'garlic clove'..."
359455,Fred's Favorite Meatloaf,0.638803,"('ground beef', 'bread', 'eggs', 'onion', 'tom...","('bread', 'egg', 'ground beef', 'onion', 'pepp..."
365102,Meatloaf,0.63502,"('ground beef', 'beef gravy', 'white bread', '...","('beef gravy', 'carrot', 'egg', 'ground beef',..."


In [39]:
ingredients_list = 'tomato bread beef carrot'
excluded_ingredients = 'cheese'

recommend(ingredients_list, excluded_ingredients=excluded_ingredients)

Unnamed: 0,Name,Similarity,IngredientsExtracted,NLP_Ingredients
158400,Vegetable Beef Noodle Soup,0.659529,"('ground beef', 'chopped tomato', 'carrot', 'c...","('beef', 'carrot', 'celery', 'ground beef', 't..."
323064,Gerden Meatballs,0.647784,"('ground beef', 'carrot', 'broccoli', 'bread',...","('bread', 'broccoli', 'carrot', 'garlic clove'..."
359455,Fred's Favorite Meatloaf,0.638803,"('ground beef', 'bread', 'eggs', 'onion', 'tom...","('bread', 'egg', 'ground beef', 'onion', 'pepp..."
365102,Meatloaf,0.63502,"('ground beef', 'beef gravy', 'white bread', '...","('beef gravy', 'carrot', 'egg', 'ground beef',..."


In [28]:
ingredients_list = 'egg'
recommend(ingredients_list)

Unnamed: 0,Name,Similarity,IngredientsExtracted,NLP_Ingredients
66619,Freezing Eggs,1.0,"('egg', 'egg')","('egg',)"
106260,Easter Hard Boiled Eggs,1.0,"('egg', 'egg')","('egg',)"
457727,Perfect Hard Boiled Eggs,1.0,"('eggs', 'eggs')","('egg',)"
365466,Sunny-Side Scramblers,1.0,"('eggs', 'half-and-half')","('egg',)"
238585,Pastry Egg Wash,0.809612,"('egg', 'half-and-half', 'salt')","('egg', 'salt')"


In [38]:
ingredients_list = 'Eggs'
recommend(ingredients_list)

Unnamed: 0,Name,Similarity,IngredientsExtracted,NLP_Ingredients
66619,Freezing Eggs,1.0,"('egg', 'egg')","('egg',)"
106260,Easter Hard Boiled Eggs,1.0,"('egg', 'egg')","('egg',)"
457727,Perfect Hard Boiled Eggs,1.0,"('eggs', 'eggs')","('egg',)"
365466,Sunny-Side Scramblers,1.0,"('eggs', 'half-and-half')","('egg',)"
238585,Pastry Egg Wash,0.809612,"('egg', 'half-and-half', 'salt')","('egg', 'salt')"


In [41]:
ingredients_list = 'egg vanilla'
recommend(ingredients_list)

Unnamed: 0,Name,Similarity,IngredientsExtracted,NLP_Ingredients
412341,Teething Biscuits,0.765222,"('sugar', 'eggs', 'vanilla', 'flour')","('egg', 'flour', 'sugar', 'vanilla')"
146772,Aunt Emma's Old Fashioned Egg Nog,0.751238,"('egg', 'powdered sugar', 'vanilla')","('egg', 'powder sugar', 'vanilla')"
451431,Lou's Easy Delicious Custard,0.73946,"('eggs', 'sugar', 'milk', 'vanilla')","('egg', 'milk', 'sugar', 'vanilla')"
262031,Crème Brulee (Romanian Style),0.73946,"('milk', 'eggs', 'sugar', 'vanilla')","('egg', 'milk', 'sugar', 'vanilla')"
311643,Vanilla Cream Cheese Pancakes,0.730883,"('eggs', 'cream cheese', 'vanilla')","('cream cheese', 'egg', 'vanilla')"


In [40]:
ingredients_list = 'eggs vanilla'
recommend(ingredients_list)

Unnamed: 0,Name,Similarity,IngredientsExtracted,NLP_Ingredients
412341,Teething Biscuits,0.765222,"('sugar', 'eggs', 'vanilla', 'flour')","('egg', 'flour', 'sugar', 'vanilla')"
146772,Aunt Emma's Old Fashioned Egg Nog,0.751238,"('egg', 'powdered sugar', 'vanilla')","('egg', 'powder sugar', 'vanilla')"
451431,Lou's Easy Delicious Custard,0.73946,"('eggs', 'sugar', 'milk', 'vanilla')","('egg', 'milk', 'sugar', 'vanilla')"
262031,Crème Brulee (Romanian Style),0.73946,"('milk', 'eggs', 'sugar', 'vanilla')","('egg', 'milk', 'sugar', 'vanilla')"
311643,Vanilla Cream Cheese Pancakes,0.730883,"('eggs', 'cream cheese', 'vanilla')","('cream cheese', 'egg', 'vanilla')"


In [42]:
ingredients_list = 'Beef Tomato Egg Rice'
recommend(ingredients_list)

Unnamed: 0,Name,Similarity,IngredientsExtracted,NLP_Ingredients
236164,Lazy Stuffed Cabbage Casserole,0.69762,"('beef', 'cooked rice', 'onion', 'butter', 'eg...","('beef', 'butter', 'cabbage', 'egg', 'onion', ..."
188536,Brown Rice With Meat,0.693013,"('butter', 'ground beef', 'brown rice', 'water...","('brown rice', 'butter', 'ground beef', 'tomat..."
220032,Porcupine Meatballs,0.691879,"('tomato soup', 'ground beef', 'uncooked rice'...","('egg', 'ground beef', 'onion', 'parsley', 'ri..."
160741,Maltese Baked Rice (Better Than the Original!!),0.689187,"('corned beef', 'tomato paste', 'eggs', 'rice'...","('corn beef', 'egg', 'oil', 'rice', 'sugar', '..."
458247,Pressure Cooker Meatballs,0.673987,"('ground beef', 'uncooked rice', 'salt', 'pepp...","('ground beef', 'onion', 'pepper', 'rice', 'sa..."


In [43]:
ingredients_list = 'beef tomato egg rice'
recommend(ingredients_list)

Unnamed: 0,Name,Similarity,IngredientsExtracted,NLP_Ingredients
236164,Lazy Stuffed Cabbage Casserole,0.69762,"('beef', 'cooked rice', 'onion', 'butter', 'eg...","('beef', 'butter', 'cabbage', 'egg', 'onion', ..."
188536,Brown Rice With Meat,0.693013,"('butter', 'ground beef', 'brown rice', 'water...","('brown rice', 'butter', 'ground beef', 'tomat..."
220032,Porcupine Meatballs,0.691879,"('tomato soup', 'ground beef', 'uncooked rice'...","('egg', 'ground beef', 'onion', 'parsley', 'ri..."
160741,Maltese Baked Rice (Better Than the Original!!),0.689187,"('corned beef', 'tomato paste', 'eggs', 'rice'...","('corn beef', 'egg', 'oil', 'rice', 'sugar', '..."
458247,Pressure Cooker Meatballs,0.673987,"('ground beef', 'uncooked rice', 'salt', 'pepp...","('ground beef', 'onion', 'pepper', 'rice', 'sa..."


In [47]:
ingredients_list = 'chicken noodles chives onion'
excluded_ingredients = 'eggplant, cheese'
recommend(ingredients_list, excluded_ingredients=excluded_ingredients)

Unnamed: 0,Name,Similarity,IngredientsExtracted,NLP_Ingredients
225031,Noodles With Chives,0.738955,"('egg noodles', 'butter', 'chicken stock', 'fr...","('butter', 'chicken stock', 'egg noodle', 'fre..."
190912,Chive Salt,0.623669,"('chives', 'salt')","('chive', 'salt')"


Save Model

In [None]:
# Save the nearest neighbors model
with open('models/nearest_neighbors_model.pkl', 'wb') as f:
    pickle.dump(nearest_neighbors, f)