<a href="https://colab.research.google.com/github/Besuf/Llava-colab/blob/main/Moondream_Olama.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!sudo apt update
!sudo apt install -y pciutils
!curl -fsSL https://ollama.com/install.sh | sh

In [None]:
# Start/restart Ollama and wait for port 11434 to be ready
!pkill -f "ollama serve" || true
%env OLLAMA_NUM_GPU=1
%env OLLAMA_KEEP_ALIVE=30m
!nohup ollama serve > /content/ollama.log 2>&1 &

import time, requests, os

BASE = "http://127.0.0.1:11434"

def wait_for_ollama(timeout=120):
    t0 = time.time()
    while time.time() - t0 < timeout:
        try:
            r = requests.get(f"{BASE}/api/version", timeout=2)
            if r.ok:
                print("Ollama up:", r.json())
                return True
        except Exception:
            pass
        time.sleep(1)
    print("Timeout waiting for Ollama. Last logs:")
    os.system("tail -n 80 /content/ollama.log")
    return False

assert wait_for_ollama(), "Ollama server did not start"

In [None]:
!ollama pull moondream:1.8b

In [None]:
import requests, json

BASE = "http://127.0.0.1:11434"

# 1) Confirm server responding
ver = requests.get(f"{BASE}/api/version", timeout=5)
ver.raise_for_status()

# 2) Confirm model tag present
tags = requests.get(f"{BASE}/api/tags", timeout=10).json()
assert any(m.get("name","").startswith("moondream:1.8b") for m in tags.get("models", [])), "moondream:1.8b not found in tags"

# 3) Simple chat test (text only)
payload = {
    "model": "moondream:1.8b",
    "messages": [{"role": "user", "content": "Say: ready."}],
    "stream": False
}
resp = requests.post(f"{BASE}/api/chat", json=payload, timeout=180)
resp.raise_for_status()

# Handle single JSON or accidental NDJSON
try:
    data = resp.json()
except Exception:
    lines = [l for l in resp.text.splitlines() if l.strip()]
    objs = []
    for l in lines:
        try:
            objs.append(json.loads(l))
        except:
            pass
    assert objs, "No parseable JSON in response"
    data = objs[-1]

text = data.get("message", {}).get("content", "").strip()
assert text, "Empty content from moondream:1.8b"

print(f"moondream:1.8b chat OK on 11434 -> {text}")

In [None]:
!pip install langchain-ollama

In [None]:
# ...existing code...
import base64, requests, os
from IPython.display import Markdown, display, Image as IPyImage

BASE = "http://127.0.0.1:11434"

def b64_image(path):
    with open(path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")

def caption_and_tags(image_path, temperature=0.1):
    # Base caption must stay literal so another LLM can polish it later.
    system = (
        "You create literal English captions for a wedding-planning vendor catalog.\n"
        "Your text feeds a second model that adds flourish, so stay factual and grounded.\n"
        "Output exactly two lines:\n"
        "Line 1 - Caption: One concise sentence (<=220 characters) naming only what is clearly visible.\n"
        "Line 2 - Notes: Key visual evidence or uncertainties; write 'Notes: none' if everything is certain.\n"
        "Rules:\n"
        "- Mention people, props, colors, and actions only when obvious.\n"
        "- No marketing language, figurative wording, or invented brands/locations.\n"
        "- Do not guess unreadable text; say 'unreadable text' if lettering is unclear.\n"
        "- If you truly cannot tell what the scene shows, output 'Caption: Can't describe image accurately' and explain why in Notes.\n"
        "- Always respond in English."
    )
    user = (
        "Provide the literal catalog-ready description for this image so another model can restyle it. "
        "Only describe what you can verify."
    )
    payload = {
        "model": "moondream:1.8b",
        "messages": [
            {"role": "system", "content": system},
            {"role": "user", "content": user, "images": [b64_image(image_path)]},
        ],
        "stream": False,
        "options": {"temperature": temperature},
    }
    r = requests.post(f"{BASE}/api/chat", json=payload, timeout=600)
    r.raise_for_status()
    return r.json().get("message", {}).get("content", "").strip()

for img_path in ["sample-1.jpeg", "sample-2.jpeg", "sample-3.jpeg"]:
    if not os.path.exists(img_path):
        raise FileNotFoundError(f"Image not found: {img_path}. CWD: {os.getcwd()}")
    display(IPyImage(filename=img_path))
    out = caption_and_tags(img_path)
    display(Markdown(out.replace("\n", "  \n")))
