In [1]:
from elasticsearch import Elasticsearch

# Create Elasticsearch client
es = Elasticsearch(
    "http://localhost:9200",  # Changed from https to http
    basic_auth=("elastic", "pass"),  # Use your actual password
)

# Test connection
try:
    if es.ping():
        print("Successfully connected to Elasticsearch")
        print(es.info())
    else:
        print("Could not connect to Elasticsearch")
except Exception as e:
    print(f"Connection failed: {e}")

Successfully connected to Elasticsearch
{'name': '6263b01a17a1', 'cluster_name': 'docker-cluster', 'cluster_uuid': 'tFhb53A1Rw2c0dBvX5pkpA', 'version': {'number': '8.17.0', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '2b6a7fed44faa321997703718f07ee0420804b41', 'build_date': '2024-12-11T12:08:05.663969764Z', 'build_snapshot': False, 'lucene_version': '9.12.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}


In [8]:
MAPPINGS = {
    "users": {
        "mappings": {
            "properties": {
                "email": {"type": "keyword"},
                "name": {"type": "text"},
                "password": {"type": "keyword"},
                "embedding": {
                    "type": "dense_vector",
                    "dims": 768,  # Assuming you're using OpenAI embeddings
                },
            }
        }
    },
    "recipes": {
        "mappings": {
            "properties": {
                "id": {"type": "integer"},
                "title": {"type": "text"},
                "ingredients": {"type": "text"},
                "instructions": {"type": "text"},
                "prep_time": {"type": "integer"},
                "cook_time": {"type": "integer"},
                "cuisine": {"type": "keyword"},
                "course": {"type": "keyword"},
                "diet": {"type": "keyword"},
                "image": {"type": "keyword"},  # For URLs, use keyword
                "url": {"type": "keyword"},
                "embedding": {
                    "type": "dense_vector",
                    "dims": 768,
                    "similarity": "cosine",
                },
            }
        }
    },
    "feedback": {
        "mappings": {
            "properties": {
                "email": {"type": "keyword"},
                "input_description": {"type": "text"},
                "input_image": {"type": "keyword"},  # For URLs, use keyword
                "recipe_id": {"type": "integer"},
                "rating": {"type": "integer"},
                "comment": {"type": "text"},
            }
        }
    },
    "user_reviews": {
        "mappings": {
            "properties": {"email": {"type": "keyword"}, "reviews": {"type": "text"}}
        }
    },
    "recipe_adds": {
        "mappings": {
            "properties": {
                "id": {"type": "integer"},
                "title": {"type": "text"},
                "ingredients": {"type": "text"},
                "instructions": {"type": "text"},
                "prep_time": {"type": "integer"},
                "cook_time": {"type": "integer"},
                "cuisine": {"type": "keyword"},
                "course": {"type": "keyword"},
                "diet": {"type": "keyword"},
                "image": {"type": "keyword"},
                "url": {"type": "keyword"},
                "embedding": {"type": "dense_vector", "dims": 768},
                "accepted": {"type": "boolean"},
            }
        }
    },
}

In [12]:
# ... existing elasticsearch import and client setup ...
def create_indices(es_client):
    """Create all required indices if they don't exist"""
    for index_name, mapping in MAPPINGS.items():
        try:
            if not es_client.indices.exists(index=index_name):
                print(f"Creating index '{index_name}'...")
                es_client.indices.create(index=index_name, body=mapping)
                print(f"Successfully created index '{index_name}'")
            else:
                print(f"Index '{index_name}' already exists")
        except Exception as e:
            print(f"Error creating index '{index_name}': {e}")


# Create all indices
create_indices(es)

Index 'users' already exists
Index 'recipes' already exists
Index 'feedback' already exists
Index 'user_reviews' already exists
Index 'recipe_adds' already exists


In [10]:
def delete_indices(es_client):
    """Delete all indices defined in MAPPINGS"""
    for index_name in MAPPINGS.keys():
        try:
            if es_client.indices.exists(index=index_name):
                print(f"Deleting index '{index_name}'...")
                es_client.indices.delete(index=index_name)
                print(f"Successfully deleted index '{index_name}'")
            else:
                print(f"Index '{index_name}' does not exist")
        except Exception as e:
            print(f"Error deleting index '{index_name}': {e}")


# Delete all indices
delete_indices(es)

Deleting index 'users'...
Successfully deleted index 'users'
Deleting index 'recipes'...
Successfully deleted index 'recipes'
Deleting index 'feedback'...
Successfully deleted index 'feedback'
Deleting index 'user_reviews'...
Successfully deleted index 'user_reviews'
Deleting index 'recipe_adds'...
Successfully deleted index 'recipe_adds'


In [13]:
def check_index_stats(es_client, index_name="recipes"):
    """
    Check if an index exists and get its document count

    Args:
        es_client: AsyncElasticsearch client
        index_name: Name of the index to check

    Returns:
        bool: True if index exists and has documents, False otherwise
    """
    try:
        # Check if index exists
        if not es_client.indices.exists(index=index_name):
            print(f"Index '{index_name}' does not exist!")
            return False

        # Get document count
        stats = es_client.count(index=index_name)
        doc_count = stats["count"]

        print(f"Index '{index_name}' contains {doc_count} documents")
        return doc_count > 0

    except Exception as e:
        print(f"Error checking index: {e}")
        return False


# Usage example:
has_documents = check_index_stats(es)
if not has_documents:
    print("Index is empty! You may need to index some documents first.")

Error checking index: ApiError(503, 'search_phase_execution_exception', None)
Index is empty! You may need to index some documents first.


In [14]:
import pandas as pd

df = pd.read_csv("./data.csv")
df.head()

Unnamed: 0,id,title,ingredients,instructions,prep_time,cook_time,cuisine,course,diet,image,url,embedding
0,4529,lavand-e-murgh recipe - afghani chicken in yog...,['fresh pomegranate fruit kernels few garnish'...,"['to begin making the lavand-e-murgh recipe, w...",15,25,Afghan,Dinner,High Protein Non Vegetarian,,https://www.archanaskitchen.com/lavand-e-murgh...,"[[-0.0026710997335612774, 0.003612738568335771..."
1,4640,afghani dhoog recipe - cucumber mint buttermil...,"['cumin powder jeera', 'curd dahi yogurt', 'sa...",['to begin making the afghani dhoog recipe - c...,10,0,Afghan,Snack,Vegetarian,,http://www.archanaskitchen.com/doogh-afghani-y...,"[[-0.014779524877667427, -0.008534302935004234..."
2,5978,malida recipe (healthy whole wheat afghan sweet),"['cardamom powder elaichi', 'dates pitted fine...","['to begin making the malida recipe, tear the ...",20,20,Afghan,Snack,Vegetarian,,https://www.archanaskitchen.com/malida-recipe-...,"[[-0.01772255077958107, -0.019701037555933, -0..."
3,7092,moroccan spiced millet and lentil salad recipe,"['tomato chopped', 'extra virgin olive oil', '...",['to begin making the moroccan spiced millet a...,10,20,African,Dinner,Vegetarian,,https://www.archanaskitchen.com/moroccan-spice...,"[[-0.06342744827270508, -0.01326711568981409, ..."
4,6684,chickpea & date tagine recipe,"['onion', 'cumin powder jeera', 'extra virgin ...",['to begin making the chickpea & date tagine r...,15,60,African,Dinner,High Protein Vegetarian,,https://www.archanaskitchen.com/chickpea-date-...,"[[-0.03216571733355522, 0.029672250151634216, ..."


In [23]:
from elasticsearch import Elasticsearch
from typing import Dict, Any
import sys

sys.path.append("../api/")
from models import Recipe

# ... existing code ...


def index_recipe_to_elastic(
    recipe: Recipe, es_client: Elasticsearch, index_name: str = "recipes"
) -> None:
    """
    Index a recipe to Elasticsearch

    Args:
        recipe: Recipe model instance
        es_client: Elasticsearch client instance
        index_name: Name of the Elasticsearch index (default: "recipes")
    """
    doc = {
        "id": recipe.id,
        "title": recipe.title,
        "ingredients": recipe.ingredients,
        "instructions": recipe.instructions,
        "prep_time": recipe.prep_time,
        "cook_time": recipe.cook_time,
        "cuisine": recipe.cuisine,
        "course": recipe.course,
        "diet": recipe.diet,
        "image": str(recipe.image) if recipe.image else None,
        "url": str(recipe.url) if recipe.url else None,
        "embedding": recipe.embedding,
    }

    es_client.index(index=index_name, id=str(recipe.id), document=doc)
    print(f"Indexed recipe {recipe.id} to Elasticsearch")

In [24]:
df.apply(lambda x: index_recipe_to_elastic(x, es), axis=1)

ConnectionTimeout: Connection timed out