In [9]:
from langgraph.graph import StateGraph
from langchain_ollama import ChatOllama
import base64

# ----- STATE -----
class ImageState(dict):
    image_path: str
    description: str


# ----- NODE -----
def analyze_image(state: ImageState):
    model = ChatOllama(model="llava:13b")

    # load image and convert to base64
    with open(state["image_path"], "rb") as img_file:
        img_b64 = base64.b64encode(img_file.read()).decode()

    # Must use "image_url" and wrap in data URL
    image_data_url = f"data:image/jpeg;base64,{img_b64}"

    # ChatOllama expects a list of messages with type 'image_url'
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Describe this image in detail."},
                {"type": "image_url", "image_url": image_data_url},
            ],
        }
    ]

    response = model.invoke(messages)
    state["description"] = response.content
    return state


# ----- GRAPH -----
graph = StateGraph(ImageState)
graph.add_node("analyze", analyze_image)
graph.set_entry_point("analyze")
graph.set_finish_point("analyze")

app = graph.compile()

# ----- RUN -----
result = app.invoke({"image_path": "image_side1.jpg"})
print(result["description"])


 The image displays a collection of mechanical components and fasteners, arranged in two columns against a plain background. Each column is labeled with the number of units provided next to each component's representation. Starting from the left, there are three cylindrical bolts, each indicated by "10 ×". Followed by four flat washers with "5 ×" written next to them. The second column shows two nuts (also labeled as "20 ×") and a single hex bolt with "6 ×" notation. Above these components is an image of what appears to be a mechanical assembly or part, which could possibly be the item that these fasteners are used for. The image is in black and white, and it has a schematic or technical drawing style. There are no texts other than the quantity indications.
