# Elasticsearch Inference API & Hugging Face

This notebook demonstrates how to use Hugging Face completions along with the Elasticsearch Inference API. This notebook is based on the article [Inference API & and Hugging Face](https://www.elastic.co/search-labs/blog/inference-api-and-hugging-face).

In [None]:
%pip install requests elasticsearch -q

: 

## Installing dependencies and importing packages

In [2]:
import os
import json

from elasticsearch import Elasticsearch, helpers
from getpass import getpass

## Setting up environment variables

In [None]:
os.environ["HUGGING_FACE_INFERENCE_ENDPOINT_URL"] = getpass(
    "Enter your Hugging Face Inference Endpoint URL: "
)
os.environ["ELASTICSEARCH_API_KEY"] = getpass("Enter your Elasticsearch API key: ")
os.environ["ELASTICSEARCH_URL"] = getpass("Enter your Elasticsearch URL: ")
os.environ["HUGGING_FACE_API_KEY"] = getpass("Enter your Hugging Face API key: ")


INDEX_NAME = "health_inspections"
INFERENCE_ENDPOINT_ID = "hf_endpoint"
CHAT_INFERENCE_ENDPOINT_ID = "hf_chat_endpoint"

## Elasticsearch Python client

In [5]:
es_client = Elasticsearch(
    os.environ["ELASTICSEARCH_URL"], api_key=os.environ["ELASTICSEARCH_API_KEY"]
)

## Hugging Face completions inference endpoint setup

In [None]:
try:
    resp = es_client.inference.put(
        task_type="chat_completion",
        inference_id=INFERENCE_ENDPOINT_ID,
        body={
            "service": "hugging_face",
            "service_settings": {
                "api_key": os.environ["HUGGING_FACE_API_KEY"],
                "url": os.environ["HUGGING_FACE_INFERENCE_ENDPOINT_URL"],
            },
        },
    )

    print("Chat completion inference endpoint created successfully:")
    print(json.dumps(resp, indent=2))
except Exception as e:
    print(
        "Error creating chat completion inference endpoint:", type(e).__name__, str(e)
    )

## Index setup

### Creating mappings


In [10]:
try:
    mapping = {
        "mappings": {
            "properties": {
                "id": {"type": "keyword"},
                "title": {
                    "type": "text",
                    "fields": {"keyword": {"type": "keyword"}},
                    "copy_to": "semantic_field",
                },
                "category": {"type": "keyword", "copy_to": "semantic_field"},
                "content": {
                    "type": "text",
                    "copy_to": "semantic_field",
                },
                "keywords": {"type": "keyword", "copy_to": "semantic_field"},
                "violation_type": {"type": "keyword", "copy_to": "semantic_field"},
                "inspection_priority": {"type": "keyword", "copy_to": "semantic_field"},
                "semantic_field": {"type": "semantic_text"},
            }
        }
    }

    es_client.indices.create(index=INDEX_NAME, body=mapping)
    print(f"Index {INDEX_NAME} created successfully")
except Exception as e:
    print(f"Error creating index: {e}")

Index health_inspections created successfully


### Ingesting data to Elasticsearch

In [11]:
def build_data(json_file, index_name):
    with open(json_file, "r") as f:
        data = json.load(f)

    for doc in data:
        yield {"_index": index_name, "_source": doc}


try:
    success, errors = helpers.bulk(es_client, build_data("dataset.json", INDEX_NAME))
    print(f"{success} documents indexed successfully")

    if errors:
        print("Errors during indexing:", errors)
except Exception as e:
    print(f"Error: {str(e)}")

20 documents indexed successfully


## Function to execute semantic search

In [21]:
def semantic_search(user_question: str, size: int = 5):
    try:
        response = es_client.search(
            index=INDEX_NAME,
            body={
                "query": {
                    "semantic": {
                        "field": "semantic_field",
                        "query": user_question,
                    }
                },
                "size": size,
            },
        )

        return {
            "hits": response["hits"]["hits"],
            "total_hits": response["hits"]["total"]["value"],
        }

    except Exception as e:
        print(f"Error searching index: {str(e)}")

In [None]:
results = semantic_search(user_question="temperature control food safety")

print(
    f"Total hits: {results['total_hits']}\nDocuments: {json.dumps(results["hits"], indent=2)}"
)

Total hits: 20
Documents: [
  {
    "_index": "health_inspections",
    "_id": "acEpWpoB-ecAfF0xb9Ij",
    "_score": 16.283606,
    "_source": {
      "id": "doc_001",
      "title": "Critical Violations: Temperature Control Requirements",
      "category": "Critical Violations",
      "content": "Temperature control is the most common cause of foodborne illness outbreaks. Cold food must be held at 41\u00b0F (5\u00b0C) or below, and hot food at 135\u00b0F (57\u00b0C) or above. The danger zone between 41\u00b0F and 135\u00b0F allows rapid bacterial growth. Refrigeration equipment must maintain proper temperatures consistently, with backup thermometers in all units. Walk-in coolers require daily temperature logs. Frozen food must be kept at 0\u00b0F (-18\u00b0C) or below. Temperature violations are considered critical and require immediate correction. During inspections, health inspectors will check multiple food items with calibrated thermometers. If potentially hazardous foods (meat, d

## Generating completions function

In [None]:
def stream_chat_completion(messages: list):
    try:
        response = es_client.inference.stream_chat_completion(
            inference_id=INFERENCE_ENDPOINT_ID, messages=messages
        )

        # Extracting chunk content
        for chunk in response:
            if "choices" in chunk and len(chunk["choices"]) > 0:
                choice = chunk["choices"][0]

                if "delta" in choice and "content" in choice["delta"]:
                    content = choice["delta"]["content"]

                    if content:
                        yield content

    except Exception as e:
        yield f"\n\nError en streaming: {str(e)}"

## RAG Chat with Streaming


In [None]:
def rag_chat(user_question: str):
    context_docs = semantic_search(user_question)

    context_text = ""
    if context_docs:
        context_text = "\n\nContext information:\n"
        for i, doc in enumerate(context_docs, 1):
            context_text += f"\n{i}. {doc['_source']}\n"

    system_prompt = """
        You are a helpful assistant that answers questions about restaurant health inspections, compliance, and food safety. 
        Use the provided context information to respond accurately and clearly. 

        Do not rely on your general knowledge to complement the answer. 
    """

    user_prompt = user_question
    if context_text:
        user_prompt = f"{context_text}\n\nQuestion: {user_question}"

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ]

    full_response = ""
    for chunk in stream_chat_completion(messages):
        print(chunk, end="", flush=True)
        full_response += chunk

    return full_response

In [None]:
response = rag_chat("What are the temperature requirements for storing cold food?")

## Deleting

Delete the resources used to prevent them from consuming resources.

In [None]:
# Cleanup - Delete Index
es_client.indices.delete(index=INDEX_NAME)
# TODO: delete inference

ObjectApiResponse({'acknowledged': True})