using the bert for embedding and FAISS for the nearest neighbour search

In [1]:
!pip install -q sentence-transformers faiss-cpu pandas

In [2]:
import pandas as pd
import faiss
from sentence_transformers import SentenceTransformer
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# all-MIniLM-L6-v2 

In [4]:
model = SentenceTransformer('all-MiniLM-L6-v2')  

Combine the text data that has to be embedded into a single column in data frame and then to list .

In [5]:
df = pd.read_csv("PreProcessedData.csv") 
df['text'] = df['name'] + ". " + df['ingredients'] + ". " + df['instructions']+"."+df['tags']

In [6]:
texts=df['text'].tolist()

In [7]:
embeddings = model.encode(texts, show_progress_bar=True, convert_to_numpy=True)

Batches: 100%|██████████| 110/110 [00:09<00:00, 11.60it/s]


The below cell is to scale the nutritions 

In [8]:
from sklearn.preprocessing import MinMaxScaler

nutrients = df[['calories', 'protein', 'carbohydrates','fiber','fat', 'sodium']].fillna(0)
scaler = MinMaxScaler()
nutrient_scaled = scaler.fit_transform(nutrients)


In [9]:
# combining the text embeddings and the scaled nutritions
combined_embeddings = np.hstack((embeddings, nutrient_scaled))

In [10]:
# The FAISS (Facebook AI Similarity Search) efficient similarity search and clustering of dense vectors
import faiss
dim = combined_embeddings.shape[1] 
index = faiss.IndexFlatL2(dim)
index.add(combined_embeddings)


In [11]:
import pickle 
# Save combined embeddings
np.save("combined_embeddings.npy", combined_embeddings)

# Save scaler
with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

# Save FAISS index
faiss.write_index(index,"faiss_index.bin")

In [12]:
query_text = "oats"
query_embed = model.encode([query_text])

# example nutrient goals
query_nutrient = scaler.transform([[550, 30, 35, 15, 10, 300]])  # scale same as train

# combine
query_vector = np.hstack((query_embed*2.5, query_nutrient*0.5))
D, I = index.search(query_vector.reshape(1, -1), k=3)




In [13]:
df1=pd.read_csv("Dataset_combined.csv")

In [14]:
for i in I:
    print(df1.iloc[i])

      Unnamed: 0                                      name  \
673          673                         Oats Apple Phirni   
1563        1563  Oats and Orange Rabdi, Diabetic Friendly   
348          348                  chocolate overnight oats   

                                            ingredients  \
673   ['3/4 cup powdered oats', '3/4 cup grated appl...   
1563  ['1/4 cup quick cooking rolled oats', '1/2 cup...   
348   ['1/2 cup coconut milk (nariyal ka doodh)', '1...   

                                           instructions  \
673   Heat the milk in a deep non-stick pan and brin...   
1563  Heat the ghee in a deep non-stick pan, add the...   
348   For healthy chocolate overnight oats To make h...   

                                              nutrition     time serving_size  \
673   {'Energy': '117 cal', 'Protein': '7.6 g', 'Car...  11 Mins   4 servings   
1563  {'Energy': '99 cal', 'Protein': '5.9 g', 'Carb...  17 Mins   4 servings   
348   {'Energy': '409 cal', 'Prote