In [1]:
import re
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

In [22]:
df = pd.read_csv('../data/recipes.csv')

In [23]:
df.shape

(767, 6)

In [24]:
df.head()

Unnamed: 0,image,title,description,total time,ingredients,instructions
0,https://pinchofyum.com/wp-content/uploads/Ital...,Big Yummy Italian Salad,"I love a big Italian salad! Crisp romaine, sal...",15 minutes,chees like provolon romain lettuc splash peppe...,['Prep Salad Stuff: Chop all your salad veggie...
1,https://pinchofyum.com/wp-content/uploads/Cris...,Crispy Rice Salad with Cucumbers and Herbs,"Paper-thin veggies, a shower of herbs, a pile ...",30 minutes,cornstarch english cucumb peanut brown sugar l...,['Dressing: Blitz everything up in a blender o...
2,https://pinchofyum.com/wp-content/uploads/Goch...,Incredible Gochujang Sauce,"This delightful, creamy, silky, incredible goc...",5 minutes,gochujang sauc clove garlic soy sauc rice vine...,['Mix all ingredients together in a small bowl...
3,https://pinchofyum.com/wp-content/uploads/Air-...,Ridiculously Good Air Fryer Salmon,This air fryer salmon is TOO GOOD. Crisped and...,13 minutes,paprika cornstarch onion powder brown sugar ga...,['Prep the salmon: Remove the skin from your s...
4,https://pinchofyum.com/wp-content/uploads/Two-...,Two Huge Chocolate Chip Cookies,Just two chocolate chip cookies – lightly cris...,15 minutes,allpurpos flour cornstarch chocol chip bake so...,"['Preheat the oven to 350 degrees.', 'Mix butt..."


In [25]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 767 entries, 0 to 766
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   image         767 non-null    object
 1   title         767 non-null    object
 2   description   766 non-null    object
 3   total time    767 non-null    object
 4   ingredients   767 non-null    object
 5   instructions  767 non-null    object
dtypes: object(6)
memory usage: 36.1+ KB


In [26]:
def norm(s):
    s = "" if pd.isna(s) else str(s).lower()
    s = re.sub(r"[^a-z0-9, /+-]", " ", s).replace("/", " ")
    return " ".join([t for t in re.split(r"[,\s]+", s) if t])

In [27]:
df["ing_norm"] = df["ingredients"].astype(str).apply(norm)

In [28]:
vec = TfidfVectorizer(ngram_range=(1, 2), min_df=2)
X = vec.fit_transform(df["ing_norm"])

In [29]:
knn = NearestNeighbors(metric="cosine", algorithm="brute")
knn.fit(X)

In [30]:
def find_recipes_knn(ingredients, k=10, require_all=False):
    q = norm(ingredients)
    q_vec = vec.transform([q])
    dists, idxs = knn.kneighbors(q_vec, n_neighbors=min(k*5, X.shape[0]))
    out = df.iloc[idxs[0]].copy()
    out["score"] = 1 - dists[0]

    if require_all:
        toks = set(q.split())
        out = out[out["ing_norm"].apply(lambda s: toks.issubset(set(s.split())))]

    cols = [c for c in ["title", "total_time", "ingredients", "instructions", "image", "score"] if c in out.columns]
    return out[cols].head(k)

In [31]:
find_recipes_knn("tomato, onion, garlic", k=10, require_all=True)

Unnamed: 0,title,ingredients,instructions,image,score
267,Three Cheese Baked Ziti,crush tomato mascarpon chees tomato sauc ziti ...,['Sauce: Heat olive oil in a large pot over me...,https://pinchofyum.com/wp-content/uploads/Thre...,0.232067
365,Freezer Meal Moroccan-Spiced Chickpeas,chickpea chili powder cumin turmer garam masal...,[],https://pinchofyum.com/wp-content/uploads/Moro...,0.214486
462,Creamy Tomato Risotto with Pan Fried Barramundi,lemon juic parmesan chees arborio rice clove m...,['Heat the chicken broth in a saucepan and kee...,https://pinchofyum.com/wp-content/uploads/Barr...,0.212351
343,No-Boil Baked Penne with Meatballs,crush fire roast tomato onion powder land lake...,"['Preheat oven to 400 degrees.', 'Mix all of t...",https://pinchofyum.com/wp-content/uploads/Bake...,0.194682
292,Greek Baked Orzo,dri oregano butter dice fireroast tomato veget...,"['Preheat the oven to 400 degrees.', 'In a lar...",https://pinchofyum.com/wp-content/uploads/Gree...,0.185875
17,5 Ingredient Tomato Soup,san marzano tomato clove smash garlic yellow o...,"['Make the Tomato Soup: Put the tomatoes, butt...",https://pinchofyum.com/wp-content/uploads/Toma...,0.097926
720,Slow Cooker Creole Chicken and Sausage,dice tomato green bell pepper tomato sauc brow...,['Place the essential ingredients in the crock...,https://pinchofyum.com/wp-content/uploads/Creo...,0.095442
80,Simple Homemade Tomato Soup,cream flour yellow onion dri thyme bay leaf ch...,['Fry the bacon in a large soup pot over mediu...,https://pinchofyum.com/wp-content/uploads/Home...,0.090247
344,Instant Pot Creole Chicken and Sausage,boneless skinless chicken breast green red bel...,"['Prep all your ingredients.', 'Cook everythin...",https://pinchofyum.com/wp-content/uploads/Chic...,0.085194
269,Vegetarian Chili,bean choic fresh mushroom dice green chile cum...,['Make the “meat” – pulse all ingredients in a...,https://pinchofyum.com/wp-content/uploads/Vege...,0.085014
