In [1]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle

# Load data
file = 'recipe_final (1).csv'
recipes = pd.read_csv(file)

# Preprocessing: Use the ingredients_list field and limit vocabulary to reduce model size
vectorizer = TfidfVectorizer(max_features=5000)
X_ing = vectorizer.fit_transform(recipes['ingredients_list'])

# Fit KNN using a sparse matrix and the brute algorithm (which supports sparse matrices)
KNN = NearestNeighbors(n_neighbors=3, metric='euclidean', algorithm='brute')
KNN.fit(X_ing)

def rec_recipes(query_text):
    """
    Given a query ingredient text, return recommended recipes.
    
    Parameters:
    - query_text: string representing the ingredient(s) query.
    
    Returns:
    - A pandas DataFrame with the recommended recipes (recipe_name and ingredients_list).
    """
    query_vec = vectorizer.transform([query_text])
    distances, indexes = KNN.kneighbors(query_vec)
    recs = recipes.iloc[indexes[0]]
    return recs[['recipe_name', 'ingredients_list']]

# Example prediction:
input_text = 'Cereal'
recommended_recipes = rec_recipes(input_text)
print(recommended_recipes)

# Save the fitted vectorizer and KNN model to disk
with open('vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)

with open('model.pkl', 'wb') as f:
    pickle.dump(KNN, f)


                    recipe_name  \
16877         Toasted Party Mix   
15606  Nuts and Bolts Party Mix   
44342     Six Week Bran Muffins   

                                        ingredients_list  
16877  ['margarine', 'seasoning salt', 'Worcestershir...  
15606  ['chopped peanuts', 'mixed nuts', 'puffed whea...  
44342  ['boiling water', 'bran cereal', 'shortening',...  
