In [72]:
# pip install langchain openai

from langchain_openai import ChatOpenAI
from langchain.tools import tool
from langchain_core.messages import (
    HumanMessage,
    SystemMessage,
    AIMessage,
    ToolMessage,
)

from dotenv import load_dotenv

load_dotenv()



True

In [73]:
@tool
def capture_camera_image() -> dict:
    """
    Capture a live camera image frame.
    (Dummy tool: returns a hardcoded public image URL)
    """
    return {
        "status": "ok",
        "image_url": "https://placekitten.com/800/600"
    }

tools = [capture_camera_image]

# ---------------------------
# 2. Bind tools to the LLM
# ---------------------------
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0
).bind_tools(tools)

# ---------------------------
# 3. Initial conversation
# ---------------------------
messages = [
    SystemMessage(content="You can use tools when needed to analyze images."),
    HumanMessage(content="Look at the live camera image and describe it.")
]

# ---------------------------
# 4. First LLM call (LLM decides to call tool)
# ---------------------------
ai_msg = llm.invoke(messages)

print("\n=== AI Initial Message ===")
print(ai_msg)



=== AI Initial Message ===
content='' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 71, 'total_tokens': 82, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_a460d7e2b7', 'id': 'chatcmpl-CkFPcQZZBew2kd2sAriOxs4JtG8sW', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None} id='lc_run--951fe239-cf4f-46f8-a844-ea8fefc7f222-0' tool_calls=[{'name': 'capture_camera_image', 'args': {}, 'id': 'call_iH3RI72N3cP4WOx4KcOSrfqI', 'type': 'tool_call'}] usage_metadata={'input_tokens': 71, 'output_tokens': 11, 'total_tokens': 82, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}


In [74]:

# ---------------------------
# 6. Manually craft tool output
# ---------------------------

tool_call = ai_msg.tool_calls[0]
tool_name = tool_call["name"]
tool_call_id = tool_call["id"]

manual_tool_output = {
    "status": "ok",
    "image_url": "https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcQV7sONOx4fl1xq9CbdWUmcTamWwzrPMzqKhZOGHh-V0zHpn0Ly"
}

tool_msg = ToolMessage(
    content='image provided below',
    tool_call_id=tool_call_id
)

human_msg = HumanMessage(
    content=[
        {"type": "image_url", "image_url": {"url": "https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcQV7sONOx4fl1xq9CbdWUmcTamWwzrPMzqKhZOGHh-V0zHpn0Ly"}}
    ]
    # ,
    # tool_call_id=tool_call_id
)

tool_msg, human_msg

(ToolMessage(content='image provided below', tool_call_id='call_iH3RI72N3cP4WOx4KcOSrfqI'),
 HumanMessage(content=[{'type': 'image_url', 'image_url': {'url': 'https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcQV7sONOx4fl1xq9CbdWUmcTamWwzrPMzqKhZOGHh-V0zHpn0Ly'}}], additional_kwargs={}, response_metadata={}))

In [75]:
messages.append(ai_msg)
messages.append(tool_msg)
messages.append(human_msg)

In [76]:



for msg in messages:
    print(type(msg), msg.content)
    print("\n")

<class 'langchain_core.messages.system.SystemMessage'> You can use tools when needed to analyze images.


<class 'langchain_core.messages.human.HumanMessage'> Look at the live camera image and describe it.


<class 'langchain_core.messages.ai.AIMessage'> 


<class 'langchain_core.messages.tool.ToolMessage'> image provided below


<class 'langchain_core.messages.human.HumanMessage'> [{'type': 'image_url', 'image_url': {'url': 'https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcQV7sONOx4fl1xq9CbdWUmcTamWwzrPMzqKhZOGHh-V0zHpn0Ly'}}]




In [77]:

# ---------------------------
# 7. Second LLM call (model now analyzes image)
# ---------------------------
final_msg = llm.invoke(messages)

print("\n=== Final AI Response ===")
print(final_msg.content)


=== Final AI Response ===
The image features three young men standing together, each displaying a humorous expression. They are holding a large sign that reads "3 IDIOTS," which is likely a reference to a popular film. The men are dressed casually, with two of them wearing shorts and the third in pants, creating a playful and comedic vibe. The background is simple, focusing attention on the characters and the sign.
