In [1]:
import requests
import json

ES_URL = "http://localhost:9200"
ES_USER = "elastic"
ES_PASS = "changeme"

auth = (ES_USER, ES_PASS)

# Check if index exists
resp = requests.head(f"{ES_URL}/docs-elser", auth=auth)
if resp.status_code == 200:
    print("Index 'docs-elser' already exists. Current mapping:\n")
    mapping = requests.get(f"{ES_URL}/docs-elser/_mapping", auth=auth).json()
    print(json.dumps(mapping, indent=2))
else:
    print("Creating index 'docs-elser' with sparse_vector mapping...\n")

    index_body = {
        "settings": {
            "number_of_shards": 1,
            "number_of_replicas": 0
        },
        "mappings": {
            "properties": {
                "text": { "type": "text" },       # raw document text
                "ml": {
                    "properties": {
                        "tokens": {               # sparse vector in ELSER format
                            "type": "sparse_vector"
                        }
                    }
                }
            }
        }
    }

    resp = requests.put(
        f"{ES_URL}/docs-elser",
        auth=auth,
        json=index_body
    )

    if resp.status_code in (200, 201):
        print("✅ Index 'docs-elser' created successfully.")
    else:
        print("❌ Error creating index:\n", resp.text)


Creating index 'docs-elser' with sparse_vector mapping...

✅ Index 'docs-elser' created successfully.


In [2]:
import requests, json, uuid

ES_URL  = "http://localhost:9200"
ES_USER = "elastic"
ES_PASS = "changeme"
auth    = (ES_USER, ES_PASS)

INDEX   = "docs-elser"          # assumes you've created this (text + ml.tokens sparse_vector)
EP_ID   = "elser-local"         # your working endpoint name

# ---- 1) choose a doc to index ----
text = "hello world from addis ababa"

# ---- 2) get sparse embedding from inference endpoint ----
infer_resp = requests.post(
    f"{ES_URL}/_inference/sparse_embedding/{EP_ID}",
    auth=auth,
    json={"input": text},
    timeout=60
)
infer_resp.raise_for_status()
data = infer_resp.json()

# Response shape → pick the first embedding dict
# Looks like: { "sparse_embedding": [ { "is_truncated": false, "embedding": { "hello": 2.09, ... } } ] }
embedding = data["sparse_embedding"][0]["embedding"]

# ---- 3) index the doc (text + sparse vector) ----
doc = {
    "text": text,
    "ml": {
        "tokens": embedding
    }
}

doc_id = str(uuid.uuid4())
idx_resp = requests.put(f"{ES_URL}/{INDEX}/_doc/{doc_id}", auth=auth, json=doc, timeout=60)
print("INDEX RESPONSE:", idx_resp.status_code, idx_resp.text)

# ---- 4) fetch back to confirm ----
get_resp = requests.get(f"{ES_URL}/{INDEX}/_doc/{doc_id}", auth=auth, timeout=60)
print("\nFETCHED DOC:")
print(json.dumps(get_resp.json(), indent=2)[:2000])

INDEX RESPONSE: 201 {"_index":"docs-elser","_id":"ac219f25-7105-40dd-8ea7-652f1c4343d9","_version":1,"result":"created","_shards":{"total":1,"successful":1,"failed":0},"_seq_no":0,"_primary_term":1}

FETCHED DOC:
{
  "_index": "docs-elser",
  "_id": "ac219f25-7105-40dd-8ea7-652f1c4343d9",
  "_version": 1,
  "_seq_no": 0,
  "_primary_term": 1,
  "found": true,
  "_source": {
    "text": "hello world from addis ababa",
    "ml": {
      "tokens": {
        "hello": 2.0931942,
        "world": 2.0573184,
        "aba": 1.6207728,
        "add": 1.284134,
        "##ba": 1.2064378,
        "nigeria": 1.0181541,
        "africa": 0.97434705,
        "##is": 0.9174954,
        "welcome": 0.7998751,
        "restaurant": 0.78125226,
        "festival": 0.7610713,
        "brazil": 0.63065684,
        "madagascar": 0.6174381,
        "flight": 0.52544105,
        "dj": 0.5146611,
        "life": 0.48759228,
        "museum": 0.4841117,
        "airport": 0.45888293,
        "island": 0.4568475

In [3]:
import requests, json

ES_URL  = "http://localhost:9200"
ES_USER = "elastic"
ES_PASS = "changeme"
auth    = (ES_USER, ES_PASS)

INDEX   = "docs-elser"
EP_ID   = "elser-local"

query_text = "greetings from ethiopia"   # try your own text here

# ES 9.x sparse search: use the sparse_vector query with inference endpoint
body = {
    "query": {
        "sparse_vector": {
            "field": "ml.tokens",
            "inference_id": EP_ID,
            "query": query_text
        }
    }
}

resp = requests.post(f"{ES_URL}/{INDEX}/_search", auth=auth, json=body, timeout=60)
resp.raise_for_status()
data = resp.json()

print("Total hits:", data.get("hits", {}).get("total"))
for i, hit in enumerate(data.get("hits", {}).get("hits", []), 1):
    print(f"\nHit {i} | _id={hit['_id']} | _score={hit['_score']:.4f}")
    print(hit["_source"]["text"])


Total hits: {'value': 1, 'relation': 'eq'}

Hit 1 | _id=ac219f25-7105-40dd-8ea7-652f1c4343d9 | _score=6.2818
hello world from addis ababa
