## Food DB
# This notebook demonstrates using txtai embeddings to create a searchable food database with rich metadata.
# It shows how to index food items with details like cuisine type, price, location and ingredients,
# enabling both semantic and hybrid search capabilities.


In [4]:
from txtai import Embeddings
import json

# Sample food dataset with rich metadata
foods = [
    {
        "text": "Fresh Homemade Italian Pizza Margherita with basil and mozzarella",
        "metadata": {
            "cuisine": "Italian",
            "type": "pizza",
            "price": 15.99,
            "location": "Little Italy, NYC",
            "ingredients": ["tomato sauce", "mozzarella", "basil", "olive oil"],
            "dietary": ["vegetarian"]
        }
    },
    {
        "text": "Spicy Thai Red Curry with coconut milk and jasmine rice",
        "metadata": {
            "cuisine": "Thai",
            "type": "curry",
            "price": 12.99,
            "location": "Bangkok Kitchen, LA",
            "ingredients": ["coconut milk", "red curry paste", "bamboo shoots", "jasmine rice"],
            "dietary": ["gluten-free"]
        }
    }
]

def create_food_index():
    # Create embeddings instance with our standard config
    embeddings = Embeddings({
        "path": "sentence-transformers/nli-mpnet-base-v2",
        "content": True,
        "backend": "faiss",
        "indexes": {
            "sparse": {
                "bm25": {
                    "terms": True,
                    "normalize": True
                }
            },
            "dense": {}
        },
        "batch": 32,
        "normalize": True
    })

    # Index the data
    embeddings.index(foods)

    # Save the index
    embeddings.save("food-index")
    return embeddings

In [3]:
def test_searches(embeddings):
    # Test semantic search
    print("\nSemantic Search for 'spicy asian food':\n")
    print(json.dumps(embeddings.search("spicy asian food"), indent=2))

    # Test hybrid search
    print("\nHybrid Search for 'italian vegetarian':\n")
    print(json.dumps(embeddings.search(
        "SELECT text, score, metadata FROM txtai WHERE similar('italian') AND similar('vegetarian')"
    ), indent=2))

if __name__ == "__main__":
    embeddings = create_food_index()
    test_searches(embeddings)


Semantic Search for 'spicy asian food':

[
  {
    "id": "1",
    "text": "Spicy Thai Red Curry with coconut milk and jasmine rice",
    "score": 0.5766427516937256
  },
  {
    "id": "0",
    "text": "Fresh Homemade Italian Pizza Margherita with basil and mozzarella",
    "score": 0.289595365524292
  }
]

Hybrid Search for 'italian vegetarian':

[
  {
    "text": "Fresh Homemade Italian Pizza Margherita with basil and mozzarella",
    "score": 0.28662943840026855,
    "metadata": "{\"cuisine\":\"Italian\",\"type\":\"pizza\",\"price\":15.99,\"location\":\"Little Italy, NYC\",\"ingredients\":[\"tomato sauce\",\"mozzarella\",\"basil\",\"olive oil\"],\"dietary\":[\"vegetarian\"]}"
  },
  {
    "text": "Spicy Thai Red Curry with coconut milk and jasmine rice",
    "score": 0.16934195160865784,
    "metadata": "{\"cuisine\":\"Thai\",\"type\":\"curry\",\"price\":12.99,\"location\":\"Bangkok Kitchen, LA\",\"ingredients\":[\"coconut milk\",\"red curry paste\",\"bamboo shoots\",\"jasmine rice\