# Recommender Model

In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
import scipy.sparse
import pickle

In [2]:
# load precomputed tfidf matrix and vectorizer
tfidf_matrix = scipy.sparse.load_npz('models/tfidf_matrix.npz')
with open('models/tfidf_vectorizer.pkl', 'rb') as f:
    vectorizer = pickle.load(f)
data = pd.read_csv('Datasets/recipes_food_com.csv')

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


FileNotFoundError: [Errno 2] No such file or directory: 'Datasets/recipes_food_com.csv'

Test combined cleaned data

In [30]:
# Initialize NearestNeighbors model and fit on the tfidf_matrix
nearest_neighbors = NearestNeighbors(metric='cosine', algorithm='brute')
nearest_neighbors.fit(tfidf_matrix)

def recommend(ingredients_list, top_n=5):
    # Transform the user's input ingredients into the vector space
    user_vector = vectorizer.transform([ingredients_list])
    
    # Find the top N nearest neighbors
    distances, indices = nearest_neighbors.kneighbors(user_vector, n_neighbors=top_n)
    
    # Retrieve recommended recipes and their similarity scores
    recommendations = data.iloc[indices[0]].copy()
    recommendations['Similarity'] = 1 - distances[0]  # Similarity = 1 - distance (cosine)
    
    return recommendations[['Name', 'Similarity', 'IngredientsExtracted']]


In [31]:
ingredients_list = 'tomato bread beef carrot'

recommend(ingredients_list)

Unnamed: 0,Name,Similarity,IngredientsExtracted
404267,Fred's Favorite Meatloaf,0.674316,ground beef bread onion tomato sauce
86138,Hamburger Soup (Real Comfort Food!),0.59283,ground beef beef carrot salt pepper
434974,Vegan Energizing Juice for Juicer,0.561279,carrot
108704,Honey -Bourbon Glazed Carrots,0.561279,carrot
427040,Brown Windsor Beef Soup,0.549752,onion carrot beef beef broth parsley chopped


In [2]:
tfidf_matrix = scipy.sparse.load_npz('models/tfidf_matrix_updated.npz')
with open('models/tfidf_vectorizer_updated.pkl', 'rb') as f:
    vectorizer = pickle.load(f)
data = pd.read_csv('Data/recipes_food_com_cleaned.csv')

In [4]:
nearest_neighbors = NearestNeighbors(metric='cosine', algorithm='brute')
nearest_neighbors.fit(tfidf_matrix)

def recommend(ingredients_list, top_n=5):
    # Transform the user's input ingredients into the vector space
    # Input is a single string
    user_vector = vectorizer.transform([ingredients_list])
    
    # Find the top N nearest neighbors
    distances, indices = nearest_neighbors.kneighbors(user_vector, n_neighbors=top_n)
    
    # Retrieve recommended recipes and their similarity scores
    recommendations = data.iloc[indices[0]].copy()
    recommendations['Similarity'] = 1 - distances[0]  # Similarity = 1 - distance (cosine)
    
    return recommendations[['Name', 'Similarity', 'Cleaned_Ingredients']]

In [5]:
ingredients_list = 'tomato bread beef carrot'

recommend(ingredients_list)

Unnamed: 0,Name,Similarity,Cleaned_Ingredients
404267,Fred's Favorite Meatloaf,0.620458,"['salt', 'ground beef', 'pepper', 'bread', 'eg..."
120679,Deanna's Meatloaf,0.592596,"['mustard', 'salt', 'ground beef', 'cheese', '..."
460123,"Courtney's ""picky Eater's Special"" Stew",0.565443,"['ground beef', 'lean ground beef', 'tomato ju..."
218566,Vegetable Beef Noodle Soup,0.550052,"['ground beef', 'water', 'tomato', 'beef ramen..."
184914,Ground Beef and Cabbage Stew,0.522353,"['celery rib', 'ground beef', 'beef broth', 'c..."


In [6]:
ingredients_list = 'egg'
recommend(ingredients_list)

Unnamed: 0,Name,Similarity,Cleaned_Ingredients
368767,Self-Peeling Hard-Cooked Eggs,1.0,"['', 'egg']"
81436,Easter Hard Boiled Eggs,1.0,['egg']
469146,Egg in a Microwave,0.881092,"['salt', 'egg']"
176306,Homemade Pasta,0.79047,"['egg', 'flour']"
235313,Model's Honey Egg Facial Mask,0.788446,"['egg white egg egg whites', 'honey']"


In [7]:
ingredients_list = 'eggs'
recommend(ingredients_list)

Unnamed: 0,Name,Similarity,Cleaned_Ingredients
90713,Hard Boiled Eggs in the Oven,1.0,['eggs']
271988,Aunt Ruth's Hot Milk Sponge Cake,0.909798,"['salt', 'sugar', 'baking powder', 'eggs', 'mi..."
44676,Grandmother's Eggs,0.86631,"['fresh eggs', 'butter', 'eggs']"
100661,Perfect Hard Boiled Eggs (Technique),0.86296,"['salt', 'eggs']"
480838,Garlic Eggs Cooked on the Plate,0.833537,"['salt', 'eggs eggs', 'garlic powder', 'eggs',..."


In [8]:
ingredients_list = 'egg vanilla'
recommend(ingredients_list)

Unnamed: 0,Name,Similarity,Cleaned_Ingredients
457100,Quick and Easy Frosting,0.900487,"['', 'egg', 'vanilla', 'sugar']"
3286,Nonfat Caramel Candy,0.789153,"['egg white', 'brown sugar', 'vanilla extract'..."
226113,Egg Cheese,0.750257,"['salt', 'sugar', 'milk', 'vanilla', 'egg']"
137720,Grandma's Butter Cookies,0.744435,"['sugar', 'vanilla', 'butter', 'egg', 'flour']"
270950,Swedish Spritz Cookies,0.744435,"['sugar', 'vanilla', 'butter', 'egg', 'flour']"


In [9]:
ingredients_list = 'eggs vanilla'
recommend(ingredients_list)

Unnamed: 0,Name,Similarity,Cleaned_Ingredients
331339,Teething Biscuits,0.792333,"['sugar', 'vanilla', 'eggs', 'flour']"
219701,Lou's Easy Delicious Custard,0.770616,"['sugar', 'milk', 'vanilla', 'eggs']"
266339,Cr&egrave;me Brulee (Romanian Style),0.770616,"['sugar', 'milk', 'vanilla', 'eggs']"
207328,Milo French Toast,0.763898,"['vanilla', 'milk', 'butter', 'eggs']"
259331,Vanilla Cream Cheese Pancakes,0.759464,"['cream cheese', 'vanilla', 'eggs']"


In [10]:
ingredients_list = 'Beef Tomato Egg Rice'
recommend(ingredients_list)

Unnamed: 0,Name,Similarity,Cleaned_Ingredients
23257,Ultra Simple Porcupine Meatballs in Tomato Sauce,0.69084,"['ground beef', 'condensed tomato soup', 'onio..."
474922,Mom's Porcupine Meatballs,0.647695,"['tomato soup', 'salt', 'ground beef', 'pepper..."
363144,Porcupine Meatballs,0.64759,"['tomato soup', 'ground beef', 'eggs', 'onion'..."
256374,Mom's Meatloaf,0.625209,"['tomato soup', 'salt', 'ground beef', 'pepper..."
318965,Goulash,0.621876,"['tomato soup', 'green pepper', 'ground beef',..."


Save Model

In [43]:


# Save the nearest neighbors model
with open('models/nearest_neighbors_model.pkl', 'wb') as f:
    pickle.dump(nearest_neighbors, f)

Update model versions

In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(data['Cleaned_Ingredients'])

with open('models/tfidf_vectorizer_updated.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)

scipy.sparse.save_npz('models/tfidf_matrix_updated.npz', tfidf_matrix)

NameError: name 'data' is not defined

Test nltk parsed data

In [12]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
import scipy.sparse
import pickle

In [21]:
tfidf_matrix = scipy.sparse.load_npz('models/tfidf_matrix_updated.npz')
with open('models/tfidf_vectorizer_updated.pkl', 'rb') as f:
    vectorizer = pickle.load(f)
data = pd.read_csv('NLTK.csv')

In [15]:
nearest_neighbors = NearestNeighbors(metric='cosine', algorithm='brute')
nearest_neighbors.fit(tfidf_matrix)

def recommend(ingredients_list, top_n=5):
    # Transform the user's input ingredients into the vector space
    user_vector = vectorizer.transform([ingredients_list])
    
    # Find the top N nearest neighbors
    distances, indices = nearest_neighbors.kneighbors(user_vector, n_neighbors=top_n)
    
    # Retrieve recommended recipes and their similarity scores
    recommendations = data.iloc[indices[0]].copy()
    recommendations['Similarity'] = 1 - distances[0]  # Similarity = 1 - distance (cosine)
    
    return recommendations[['Name', 'Similarity', 'Cleaned_Ingredients']]

In [16]:
ingredients_list = 'tomato bread beef carrot'

recommend(ingredients_list)

Unnamed: 0,Name,Similarity,Cleaned_Ingredients
404267,Fred's Favorite Meatloaf,0.620458,"['salt', 'ground beef', 'pepper', 'bread', 'eg..."
120679,Deanna's Meatloaf,0.592596,"['mustard', 'salt', 'ground beef', 'cheese', '..."
460123,"Courtney's ""picky Eater's Special"" Stew",0.565443,"['ground beef', 'lean ground beef', 'tomato ju..."
218566,Vegetable Beef Noodle Soup,0.550052,"['ground beef', 'water', 'tomato', 'beef ramen..."
184914,Ground Beef and Cabbage Stew,0.522353,"['celery rib', 'ground beef', 'beef broth', 'c..."


In [17]:
ingredients_list = 'egg'

recommend(ingredients_list)

Unnamed: 0,Name,Similarity,Cleaned_Ingredients
368767,Self-Peeling Hard-Cooked Eggs,1.0,"['', 'egg']"
81436,Easter Hard Boiled Eggs,1.0,['egg']
469146,Egg in a Microwave,0.881092,"['salt', 'egg']"
176306,Homemade Pasta,0.79047,"['egg', 'flour']"
235313,Model's Honey Egg Facial Mask,0.788446,"['egg white egg egg whites', 'honey']"


In [20]:
ingredients_list = 'eggs'

recommend(ingredients_list)

Unnamed: 0,Name,Similarity,Cleaned_Ingredients
90713,Hard Boiled Eggs in the Oven,1.0,['eggs']
271988,Aunt Ruth's Hot Milk Sponge Cake,0.909798,"['salt', 'sugar', 'baking powder', 'eggs', 'mi..."
44676,Grandmother's Eggs,0.86631,"['fresh eggs', 'butter', 'eggs']"
100661,Perfect Hard Boiled Eggs (Technique),0.86296,"['salt', 'eggs']"
480838,Garlic Eggs Cooked on the Plate,0.833537,"['salt', 'eggs eggs', 'garlic powder', 'eggs',..."
