In [None]:
%pip install -q sentence-transformers transformers accelerate Pillow


In [None]:
import os
import json
from sentence_transformers import SentenceTransformer, util
from transformers import AutoProcessor, LlavaForConditionalGeneration
from PIL import Image
import torch
import requests
import base64
# Load your template metadata
with open("templates.json", "r") as f:
    templates = json.load(f)


In [None]:
st_model = SentenceTransformer("all-MiniLM-L6-v2")
descs = [t["description"] + " " + " ".join(t["tags"]) for t in templates]
desc_embeddings = st_model.encode(descs, convert_to_tensor=True)


In [None]:
def select_template(prompt):
    pe = st_model.encode(prompt, convert_to_tensor=True)
    sims = util.cos_sim(pe, desc_embeddings)[0]
    idx = sims.argmax().item()
    tmpl = templates[idx]
    return os.path.join("templates", tmpl["id"]), tmpl


In [None]:
def generate_caption(image_path, prompt):
    with open(image_path, "rb") as img_file:
        image_data = base64.b64encode(img_file.read()).decode("utf-8")

    response = requests.post(
        "http://localhost:11434/api/chat",
        json={
            "model": "llava:7b",
            "messages": [
                {"role": "system", "content": "generate a hillarious caption for this meme template"},
                {"role": "user", "content": prompt}
            ],
            "images": [image_data],
            "stream": False
        }
    )

    response.raise_for_status()
    data = response.json()
    caption = data["message"]["content"]
    return caption.strip()


In [None]:
prompt = input("Enter your meme prompt: ")
img_path, tmpl = select_template(prompt)
print("\n Using template:", tmpl["id"])
print(" Tags:", ", ".join(tmpl["tags"]), "\n")

caption = generate_caption(img_path, prompt)
print(" Generated Caption:", caption)
