### LLM Imports

In [None]:
from langchain_openai import ChatOpenAI

from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph
from langgraph.graph.message import add_messages

from langchain_core.messages import (
    HumanMessage, 
    AIMessage, 
    SystemMessage,
    BaseMessage,
    trim_messages
)

from langchain_core.prompts import (
    ChatPromptTemplate, 
    MessagesPlaceholder,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate
)

from typing import Sequence

from typing_extensions import Annotated, TypedDict

import base64
from mimetypes import guess_type

import os

In [None]:
os.environ["OPENAI_API_KEY"] = ""

In [None]:
model = ChatOpenAI(model="gpt-4o")

In [None]:
# Function to encode a local image into data URL 
def local_image_to_data_url(image_path):
    mime_type, _ = guess_type(image_path)
    # Default to png
    if mime_type is None:
        mime_type = 'image/png'

    # Read and encode the image file
    with open(image_path, "rb") as image_file:
        base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')

    # Construct the data URL
    return f"data:{mime_type};base64,{base64_encoded_data}"

In [None]:
trimmer = trim_messages(
    max_tokens=5,
    strategy="last",
    token_counter=len,
    include_system=True,
    allow_partial=False,
    # start_on="human",
)

In [None]:
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder, HumanMessagePromptTemplate

# Example Scenarios with Continuous Thought (Variable Length and Edge Cases)
examples = [
    {
        "messages": [
            "The JetBot was facing a wall. [t30]",
            "The JetBot turned left again to clear the same wall. [t30]",
            "The JetBot moved forward to explore the area. [f3]",
            "The JetBot encountered a small rock and turned slightly to the right. [t-15]"
        ],
        "image_input": "The dog is slightly visible through the smoke on the left.",
        "expected_response": "The dog is slightly visible through the smoke on the left, so the JetBot will turn left to align with it. [t30]"
    },
    {
        "messages": [
            "The JetBot is in a corner and turned right to avoid it. [t-45]",
            "The JetBot moved forward for 5 seconds to explore further. [f5]",
            "The JetBot detected a low-hanging obstacle and turned left. [t30]",
            "The JetBot turned slightly right to readjust its path. [t-15]",
            "The JetBot encountered a thick patch of smoke and paused to reassess. [f0.5]"
        ],
        "image_input": "No objects detected, but a clearing is visible ahead.",
        "expected_response": "No objects detected, but a clearing is visible ahead, so the JetBot will move forward cautiously. [f3]"
    },
    {
        "messages": [
            "The JetBot detected an object directly ahead and approached it. [f4]",
            "The JetBot encountered a wall and turned right. [t-90]",
            "The JetBot moved forward to reposition. [f2]",
            "The JetBot detected a path leading left and turned to follow it. [t30]",
            "The JetBot adjusted slightly to center itself in the path. [t-10]",
            "The JetBot paused briefly to analyze the smoky environment. [f0.5]"
        ],
        "image_input": "An object is directly in front, slightly obscured by smoke.",
        "expected_response": "The object is directly in front, slightly obscured by smoke, so the JetBot will move forward to approach it. [f3.5]"
    },
    {
        "messages": [
            "The JetBot moved forward into the smoke to explore. [f4]",
            "The JetBot encountered a wall and turned sharply left. [t90]",
            "The JetBot adjusted to the right to avoid an unseen obstacle. [t-20]",
            "The JetBot moved forward cautiously. [f2]",
            "The JetBot turned left to realign after hitting another corner. [t30]",
            "The JetBot detected an opening and moved forward. [f3.5]",
            "The JetBot detected the object on the right and began to approach. [t-30]"
        ],
        "image_input": "The object is now directly ahead in a clearing.",
        "expected_response": "The object is now directly ahead in a clearing, so the JetBot will move forward to reach it. [f5]"
    },
    {
        "messages": [
            "The JetBot was facing a wall and turned left slightly. [t15]",
            "The JetBot was still slightly facing the wall and turned left again to clear it. [t15]",
            "The JetBot moved forward to ensure it avoided the wall entirely. [f3]",
            "The JetBot encountered another corner and turned sharply left. [t90]",
            "The JetBot moved forward through the smoke. [f4]",
            "The JetBot detected an object directly in front. [f3]"
        ],
        "image_input": "The object is slightly to the left of the screen.",
        "expected_response": "The object is slightly to the left of the screen, so the JetBot will turn left to align with it. [t30]"
    }
]

example_text = "\n".join(
    f"Example {i+1}:\n"
    f"Past Messages:\n" + "\n".join(f"  {msg}" for msg in example["messages"]) +
    f"\nImage Input:\n  {example['image_input']}\nExpected Response:\n  {example['expected_response']}\n"
    for i, example in enumerate(examples)
)


image_prompt_template = ChatPromptTemplate.from_messages(
    messages=[
        (
            "system",
            f"""You are a robot controller tasked with navigating a JetBot to a dog. 
            The JetBot receives image inputs and must reason about its environment and past actions to choose the best path forward. 
            The JetBot operates inside a tent and in a smoky environment, which may obscure visibility. 
            If the JetBot encounters obstacles (e.g., walls, corners, or other objects), turn to avoid them. 
            Always reason step-by-step and base your decisions on continuity from past actions.
            
            Here are examples of how you should reason and respond:
            {example_text}
            """
        ),
        # MessagesPlaceholder(variable_name="messages"),
        (
            "user",
            """Now here are the actual instructions and reasoning you have given the Jetbot so far: {messages}
            First, summarize the instructions you have given so far sequentially. (Start with Instruction 1, If there are none, don't.). 
            Secondly, describe the following image. (If there is a dog, describe its location and determine the direction the JetBot should travel to approach the dog. 
            If no dog is detected, analyze the environment and choose a direction to continue exploring.)
            Finally, use prior instructions and outcomes to reason about the next move. Strictly format your response as:
            '<reasoning>. [<command><value>]'"""
        ),
        # MessagesPlaceholder(variable_name="messages"),
        # HumanMessagePromptTemplate.from_template(
        #     "Here is the image input: {image_url}"
        # ),
        HumanMessagePromptTemplate.from_template(
            [{'image_url': {'url': '{image_path}', 'detail': '{detail_parameter}'}}]
        )
    ]
)

# The prompt template now includes extended examples, reasoning continuity, and adherence to strict formatting.

In [None]:

def message_to_string(trimmings):
    n = 1
    result = ""
    for message in trimmings:
        result += "\n" + f"Instruction Number {n}:" + message.content
        n += 1
    result += "End of Instructioins you have given so far."
    return result

In [None]:
class State(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]
    detail_parameter: str
    image_path: str
    


workflow = StateGraph(state_schema=State)


def call_model(state: State):
    trimmed_messages = trimmer.invoke(state["messages"])
    prompt = image_prompt_template.invoke(
        {"messages": message_to_string(trimmed_messages), "detail_parameter": state["detail_parameter"], "image_path": state["image_path"]}
    )
    print(prompt)
    response = model.invoke(prompt)
    return {"messages": [response]}


workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

## Jetbot Imports

In [None]:
import traitlets
import ipywidgets.widgets as widgets
from IPython.display import display
from jetbot import Camera, bgr8_to_jpeg

camera = Camera.instance(width=224, height=224)

image = widgets.Image(format='jpeg', width=224, height=224)  # this width and height doesn't necessarily have to match the camera

camera_link = traitlets.dlink((camera, 'value'), (image, 'value'), transform=bgr8_to_jpeg)

display(image)

In [None]:
import os
import time
from uuid import uuid1

free_dir = 'images'

try:
    os.makedirs(free_dir)
except FileExistsError:
    print('Directories not created because they already exist')

In [None]:
def save_snapshot(directory):
    # image_path = os.path.join(directory, str(uuid1()) + '.png')
    image_path = os.path.join(directory, 'view.png')

    with open(image_path, 'wb') as f:
        f.write(image.value)
        
def save_free():
    global free_dir
    save_snapshot(free_dir)
    
def turn(args):
    try:
        angle = int(args)
        duration = abs(angle) / 90
        if angle > 0:
            robot.left(0.11)
        else:
            robot.right(0.11)
        time.sleep(duration)
        robot.stop()
        time.sleep(0.1)
        return f"Turned {angle} degrees."
    except ValueError:
        return "Invalid argument for turn. Please provide an integer."

def move_forward(args):
    try:
        duration = float(args)
        robot.forward(0.15)
        time.sleep(duration)
        robot.stop()
        time.sleep(0.1)

        return f"Moved forward for {duration} seconds."
    except ValueError:
        return "Invalid argument for move_forward. Please provide a number."


In [None]:
def parse_chatbot_response(response):
    """Parse chatbot response for reasoning and command."""
    match = re.search(r"(.+?)\[(f|t)([-\d.]+)]", response)
    reasoning = match.group(1).strip() if match else "No reasoning provided."
    command = match.group(2) if match else None
    value = match.group(3) if match else None
    return reasoning, command, value

In [None]:
config = {"configurable": {"thread_id": "abc456"}}
detail_parameter = 'high'
path = "images/imageofdog.png"

In [None]:
while (True):
    save_free()

    url = local_image_to_data_url(path)

    output = app.invoke(
        {"detail_parameter": detail_parameter, "image_path": url},
        config,
    )
    
    reasoning, command, value = parse_chatbot_response(output.messages[-1].content)
    if command == "f":
        move_forward(value)
    elif command == "t":
        turn(value)