In [35]:
import sqlite3
import json
import sys
import os
import torch
import numpy as np

from sentence_transformers import SentenceTransformer, util

In [36]:


model = SentenceTransformer('all-MiniLM-L6-v2')

db_path = 'C:/Users/phudi/OneDrive/Desktop/as1_cs235/abc/recipe/adapters/data/database.db'


In [37]:
def sample(db_path):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute("SELECT name, ingredients, instructions, cooking_time FROM recipes LIMIT 5")
    recipes = cursor.fetchall()
    
    json_string = []
    
    for i, recipe in enumerate(recipes):
        json_string.append({
            "Recipe name": recipe[0],
            "Ingredients": recipe[1],
            "Instructions": recipe[2],
            "Cooking time": f"{recipe[3]} minutes"
        })
    
    json_string = json.dumps(json_string, indent=2)
    # print(json_string)
    
    return json_string

In [38]:
def embedding(recipes) -> list:
    #texts
    name_texts = [recipe["Recipe name"] for recipe in recipes]
    ingredients_texts = [recipe["Ingredients"] for recipe in recipes]
    instructions_texts = [recipe["Instructions"] for recipe in recipes]
    cookingtime_texts = [recipe["Cooking time"] for recipe in recipes]
    
    data = []
    #embeddings data
    name_embeddings = model.encode(name_texts, convert_to_tensor=True) 
    # print(f"Name embedding: {name_embeddings}")
    ingredients_embeddings = model.encode(ingredients_texts, convert_to_tensor=True)
    instructions_embeddings = model.encode(instructions_texts, convert_to_tensor=True)
    cookingtime_embeddings = model.encode(cookingtime_texts, convert_to_tensor=True)
    
    
    
    data.append({
        "Name Field": name_embeddings,
        "Ingredients Field": ingredients_embeddings,
        "Instructions Field": instructions_embeddings,
        "Cooking_time Field": cookingtime_embeddings
    })
    return data

In [39]:
def embedding_query(query: str) -> list:
    #embedding query
    query_embeddings = model.encode(query, convert_to_tensor=True)
    
    return query_embeddings

In [40]:
def similarity_scores(data: list, query: torch.Tensor, recipes: list, top_k=3) -> list:
    data_dict = data[0]
    num_recipes = len(recipes)
    all_scores = np.zeros((num_recipes, len(data_dict)))
    print(all_scores.shape)
    
    fields = list(data_dict.keys())
    for field_idx, (field, embeddings) in enumerate(data_dict.items()):
        sim = util.cos_sim(query, embeddings).cpu().numpy().flatten()
        all_scores[:, field_idx] = sim
    
    avg_scores = np.mean(all_scores, axis=1)
    print(f"Average score: {avg_scores}")
    sorted_indices = np.argsort(avg_scores)[::-1][:top_k]
    
    top_recipes = []
    for idx in sorted_indices:
        recipe = recipes[idx].copy()
        recipe["average_similarity"] = avg_scores[idx]
        top_recipes.append(recipe)
    
    print(f"Top Recipes: {top_recipes}")
    return top_recipes

recipes = sample(db_path=db_path)
query = "Which recipes is made from mushroom?"
recipes = json.loads(recipes)
data_embeddings = embedding(recipes=recipes)
query_embeddings = embedding_query(query=query)
similarity_scores(data=data_embeddings, query=query_embeddings, recipes=recipes)

(5, 4)
Average score: [0.26399369 0.19602754 0.33222242 0.26939606 0.29160116]
Top Recipes: [{'Recipe name': "Carina's Tofu-Vegetable Kebabs", 'Ingredients': 'extra firm tofu, eggplant, zucchini, mushrooms, soy sauce, low sodium soy sauce, olive oil, maple syrup, honey, red wine vinegar, lemon juice, garlic cloves, mustard powder, black pepper', 'Instructions': 'Drain the tofu, carefully squeezing out excess water,  and pat dry with paper towels., Cut tofu into one-inch squares., Set aside.  Cut  eggplant lengthwise in half, then cut each half into approximately three strips., Cut strips crosswise into one-inch cubes., Slice zucchini into half-inch thick  slices., Cut red pepper in half, removing stem and seeds, and cut each half into  one-inch squares., Wipe mushrooms clean with a moist paper towel and remove  stems., Thread tofu and vegetables on to barbecue skewers in alternating color  combinations: For example, first a piece of eggplant, then a slice of tofu, then zucchini, then r

[{'Recipe name': "Carina's Tofu-Vegetable Kebabs",
  'Ingredients': 'extra firm tofu, eggplant, zucchini, mushrooms, soy sauce, low sodium soy sauce, olive oil, maple syrup, honey, red wine vinegar, lemon juice, garlic cloves, mustard powder, black pepper',
  'Instructions': 'Drain the tofu, carefully squeezing out excess water,  and pat dry with paper towels., Cut tofu into one-inch squares., Set aside.  Cut  eggplant lengthwise in half, then cut each half into approximately three strips., Cut strips crosswise into one-inch cubes., Slice zucchini into half-inch thick  slices., Cut red pepper in half, removing stem and seeds, and cut each half into  one-inch squares., Wipe mushrooms clean with a moist paper towel and remove  stems., Thread tofu and vegetables on to barbecue skewers in alternating color  combinations: For example, first a piece of eggplant, then a slice of tofu, then zucchini, then red pepper, baby corn and mushrooms., Continue in this way until  all skewers are full., 

In [41]:
recipes = sample(db_path=db_path)
query = "Which recipes is made from mushroom?"

recipes = json.loads(recipes)

data_embeddings = embedding(recipes=recipes)
query_embeddings = embedding_query(query=query)

# Print shapes of embeddings in data_embeddings
for data_dict in data_embeddings:
    for field, embedding in data_dict.items():
        print(f"Shape of {field}: {embedding.shape}")

print(f"Shape of query_embeddings: {query_embeddings.shape}")

# Compute and print top 3 recipes
top_recipes = similarity_scores(data_embeddings, query_embeddings, recipes, top_k=3)
print("Top 3 recipes có similarity cao nhất:")
for recipe in top_recipes:
    print(json.dumps(recipe, indent=2))

Shape of Name Field: torch.Size([5, 384])
Shape of Ingredients Field: torch.Size([5, 384])
Shape of Instructions Field: torch.Size([5, 384])
Shape of Cooking_time Field: torch.Size([5, 384])
Shape of query_embeddings: torch.Size([384])
(5, 4)
Average score: [0.26399369 0.19602754 0.33222242 0.26939606 0.29160116]
Top Recipes: [{'Recipe name': "Carina's Tofu-Vegetable Kebabs", 'Ingredients': 'extra firm tofu, eggplant, zucchini, mushrooms, soy sauce, low sodium soy sauce, olive oil, maple syrup, honey, red wine vinegar, lemon juice, garlic cloves, mustard powder, black pepper', 'Instructions': 'Drain the tofu, carefully squeezing out excess water,  and pat dry with paper towels., Cut tofu into one-inch squares., Set aside.  Cut  eggplant lengthwise in half, then cut each half into approximately three strips., Cut strips crosswise into one-inch cubes., Slice zucchini into half-inch thick  slices., Cut red pepper in half, removing stem and seeds, and cut each half into  one-inch squares.,