In [1]:
import json
from llama_cpp import Llama

In [2]:
MODEL_PATH = "models/mistral.gguf"

In [3]:
_llm_instance = None

def get_llm():
    global _llm_instance
    if _llm_instance is None:
        _llm_instance = Llama(
            model_path=MODEL_PATH,
            n_ctx=4096,
            n_threads=8,
            n_gpu_layers=0,
        )
    return _llm_instance

In [4]:
def build_prompt(user_prompt, context):
    return f"""
You are a spatial planning engine.

Return ONLY valid JSON.
No explanation. No markdown.

Allowed targets: {context["available_targets"]}

Schema:
{{
  "object_type": "chair",
  "object_dims": {{"width": float, "depth": float}},
  "target": {{"name": string or null}},
  "weights": {{"near_target": float, "max_clearance": float, "near_wall": float}},
  "constraints": {{"min_clearance": float, "boundary_margin": float}},
  "wall_pref": "near" | "far" | "neutral"
}}

User request: "{user_prompt}"
""".strip()

In [5]:
def llm_make_plan_local(user_prompt, context):
    llm = get_llm()
    prompt = build_prompt(user_prompt, context)

    out = llm(prompt, max_tokens=350, temperature=0.1)

    text = out["choices"][0]["text"]

    import re
    match = re.search(r'\{[\s\S]*\}', text)

    if not match:
        print("LLM OUTPUT (not JSON):\n", text)
        return None

    try:
        return json.loads(match.group(0))
    except:
        print("Bad JSON:\n", match.group(0))
        return None