In [None]:
# === 4b) Verify server version and whether ELSER model is already present/usable ===
from elasticsearch import Elasticsearch

es_check = Elasticsearch(
    ES_URL,
    basic_auth=(ES_USER, ES_PASS),
    verify_certs=False,
    request_timeout=60
)

def _parse_major(ver: str) -> int | None:
    try:
        return int(str(ver).split(".")[0])
    except Exception:
        return None

# 1) Server version check
try:
    server_info = es_check.info()
except Exception as e:
    raise SystemExit(f"[FATAL] Could not connect to Elasticsearch at {ES_URL}: {e}")

server_ver = server_info.get("version", {}).get("number")
major = _parse_major(server_ver)
print(f"[INFO] Server version: {server_ver}")
if major != 9:
    print("[WARN] Server is not 9.x. Pin your Python client accordingly "
          "(e.g., elasticsearch>=8,<9 for 8.x; >=9,<10 for 9.x).")

# 2) Model presence / allocation
def get_model_status(es, model_id: str):
    """
    Returns: (present: bool, allocation_state: str|None)
    present=True if the cluster knows the model (stats entry exists).
    allocation_state is 'started'/'fully_allocated' when deployed for inference.
    """
    try:
        stats = es.ml.get_trained_models_stats(model_id=model_id)
        items = stats.get("trained_model_stats", [])
        if not items:
            return (False, None)
        s = items[0]
        alloc = (s.get("deployment_stats") or {}).get("allocation_status", {}).get("state")
        return (True, alloc)
    except Exception as e:
        print(f"[WARN] Could not read model stats for {model_id!r}: {e}")
        return (False, None)

present, alloc_state = get_model_status(es_check, MODEL_ID)
print(f"[INFO] Model present: {present} | allocation: {alloc_state}")

# 3) Enforce "use only what already exists"
REQUIRE_EXISTING_MODEL = True   # keep True per your constraint

def _force_bm25(reason: str):
    global BM25_ONLY
    print(f"[WARN] {reason} -> forcing BM25_ONLY=True to avoid downloads/allocations.")
    BM25_ONLY = True

if REQUIRE_EXISTING_MODEL:
    if not present:
        _force_bm25("Model not present in this cluster")
    elif alloc_state not in ("started", "fully_allocated"):
        _force_bm25("Model present but not allocated")

# 4) Optional: tiny infer() smoke test (only if we think hybrid is allowed)
CAN_TRY_INFER = (not BM25_ONLY) and present and (alloc_state in ("started", "fully_allocated"))
if CAN_TRY_INFER:
    try:
        # Minimal text_expansion infer call (no ingest, no indexing)
        resp = es_check.ml.infer_trained_model(
            model_id=MODEL_ID,
            body={
                "docs": [{"text_field": "hello world"}],
                "inference_config": {"text_expansion": {}}
            },
            timeout="15s"
        )
        # If we got here without exception, the model is truly usable
        print("[INFO] infer() smoke test: OK (model is responding).")
    except Exception as e:
        _force_bm25(f"infer() smoke test failed: {e}")

if not BM25_ONLY:
    print("[INFO] Hybrid search (ELSER + BM25) is allowed.")
else:
    print("[INFO] BM25-only mode is active.")
