In [24]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [25]:
from sherpa_ai.memory import Belief
from react_states import get_actions, add_state_machine
from utils import load_processed_dataset
import random
from langchain_openai import ChatOpenAI
from sherpa_ai.agents.qa_agent import QAAgent
from sherpa_ai.events import Event, EventType
from clevr_policy import ReactPolicy
from tqdm.notebook import tqdm
import pandas as pd

In [26]:
def process_scene(scene):
    if "image_index" in scene:
        scene.pop("image_index")

    for obj in scene["objects"]:
        if "rotation" in obj:
            obj.pop("rotation")
        if "pixel_coords" in obj:
            obj.pop("pixel_coords")
        if "3d_coords" in obj:
            obj.pop("3d_coords")
    
    if "image_filename" in scene:
        scene.pop("image_filename")

    if "split" in scene:
        scene.pop("split")

    if "directions" in scene:
        scene.pop("directions")


In [27]:
model_name = "gpt-4o-mini"
llm = ChatOpenAI(model=model_name)
scenes, questions = load_processed_dataset()

In [28]:
process_scene(scenes[0])
scenes[0]

{'objects': [{'color': 'brown',
   'size': 'large',
   'shape': 'cylinder',
   'material': 'rubber'},
  {'color': 'gray', 'size': 'large', 'shape': 'cube', 'material': 'rubber'},
  {'color': 'green',
   'size': 'small',
   'shape': 'cylinder',
   'material': 'rubber'},
  {'color': 'purple', 'size': 'large', 'shape': 'sphere', 'material': 'metal'},
  {'color': 'gray', 'size': 'small', 'shape': 'cube', 'material': 'metal'}],
 'relationships': {'right': [[1, 2, 3, 4], [3, 4], [1, 3, 4], [4], []],
  'behind': [[2, 3], [0, 2, 3, 4], [3], [], [0, 2, 3]],
  'front': [[1, 4], [], [0, 1, 4], [0, 1, 2, 4], [1]],
  'left': [[], [0, 2], [0], [0, 1, 2], [0, 1, 2, 3]]}}

Next: Run the react state machine with Clevr example

In [29]:
results = []

for test_question in tqdm(questions):
    scene = scenes[test_question["image_index"]]
    process_scene(scene)

    AGENT_DESCRIPTION = f"""
    You are a question answering assistant helping users to find answers to their questions based on a specific scene.
    Each object in the scene contain the following properties: color, size, shape, material, and a unique identifier.
    The properties are from a fixed set of values:
    – Size: One of large or small.
    – Color: One of gray, red, blue, green, brown, purple, cyan, or yellow.
    – Shape: One of cube (block), sphere, or cylinder.
    – Material: One of rubber (matte) or metal (shinning).
    - Unique identifier: The index of the object in the scene, starting from 0.

    Objects in the scene also have the following relationships: left, right, front or behind.

    Use the available actions to interact with the scene and collect information to answer the question.
    When provide action arguments, ONLY use the values from the fixed set of values above.
    """

    belief = Belief()
    action_map = get_actions(belief)
    add_state_machine(belief, action_map)

    belief.set("scene", scene)

    policy = ReactPolicy(
        role_description=AGENT_DESCRIPTION,
        output_instruction="Output the answer to the question.",
        llm=llm,
    )

    agent = QAAgent(
        llm=llm,
        belief=belief,
        description=AGENT_DESCRIPTION,
        num_runs=10,
        policy=policy,
    )

    belief.set_current_task(
        Event(
            EventType.task, "user", f"Answer the question: {test_question['question']}."
        )
    )
    agent.run()

    result = [str(event) for event in belief.internal_events]
    answer = belief.get("answer_action", "No answer found.")
    results.append((result, answer))

  0%|          | 0/100 [00:00<?, ?it/s]

[32m2024-10-30 09:34:58.705[0m | [31m[1mERROR   [0m | [36msherpa_ai.agents.base[0m:[36mrun[0m:[36m104[0m - [31m[1m'Material'[0m
[33m[1mTraceback (most recent call last):[0m

  File "[32mc:\Users\chenp\anaconda3\envs\sherpa\lib\[0m[32m[1mrunpy.py[0m", line [33m197[0m, in [35m_run_module_as_main[0m
    [35m[1mreturn[0m [1m_run_code[0m[1m([0m[1mcode[0m[1m,[0m [1mmain_globals[0m[1m,[0m [36m[1mNone[0m[1m,[0m
    [36m       │         │     └ [0m[36m[1m{'__name__': '__main__', '__doc__': 'Entry point for launching an IPython kernel.\n\nThis is separate from the ipykernel pack...[0m
    [36m       │         └ [0m[36m[1m<code object <module> at 0x000001DCBB05E190, file "c:\Users\chenp\anaconda3\envs\sherpa\lib\site-packages\ipykernel_launcher....[0m
    [36m       └ [0m[36m[1m<function _run_code at 0x000001DCBB0599D0>[0m

  File "[32mc:\Users\chenp\anaconda3\envs\sherpa\lib\[0m[32m[1mrunpy.py[0m", line [33m87[0m, in [35m_run_c

In [30]:
real_answers = [q["answer"] for q in questions]

In [31]:
correct = [1 if str(p[1]) == str(a) else 0 for p, a in zip(results, real_answers)]
print(len(correct), sum(correct), sum(correct) / len(correct))

100 60 0.6


In [32]:
df = pd.DataFrame({
    "predicted": [r[1] for r in results],
    "reasoning": [r[0] for r in results],
    "actual": real_answers
})

df.to_csv(f"react_results_{model_name}_no_scene.csv", index=False)

In [6]:
results_dict = []
for log, question in results:
    results_dict.append({
        "log": log,
        "question": question
    })

df = pd.DataFrame(results_dict)
df.to_csv("results_react_gpt-4o.csv", index=False)

['count_all_objects: EventType.action - Action: count_all_objects starts, Args: {}',
 'count_all_objects: EventType.action_output - Action: count_all_objects finishes, Observation: 10',
 "filter_with_attribute: EventType.action - Action: filter_with_attribute starts, Args: {'object_ids': [], 'attribute': 'material', 'value': 'rubber'}",
 'filter_with_attribute: EventType.action_output - Action: filter_with_attribute finishes, Observation: [3, 5, 6, 7, 9]',
 "filter_with_attribute: EventType.action - Action: filter_with_attribute starts, Args: {'object_ids': [], 'attribute': 'material', 'value': 'metal'}",
 'filter_with_attribute: EventType.action_output - Action: filter_with_attribute finishes, Observation: [0, 1, 2, 4, 8]',
 "answer: EventType.action - Action: answer starts, Args: {'answer': 'No, there are more matte objects than shiny objects.'}",
 'answer: EventType.action_output - Action: answer finishes, Observation: No, there are more matte objects than shiny objects.']

In [None]:
print(test_question['question'])

how many cylinders are there that are in front of a green cube?


 Input: {}...```[0m
  warn_deprecated(
 Input: {'object_ids': [], 'attribute': 'shape', 'value': 'cylinder'}...```[0m
 Input: {'object_id': 8, 'relation': 'front'}...```[0m
 Input: {'object_ids': [0, 5, 6, 8, 9], 'attribute': 'color', 'value': 'green'}...```[0m
 Input: {}...```[0m
 Input: {}...```[0m
 Input: {'object_id': 8, 'relation': 'front'}...```[0m
 Input: {}...```[0m
 Input: {'answer': '2'}...```[0m


PolicyOutput(action={
    "name": "answer",
    "args": {
        "answer": "Answer to the question, type: str"
    },
    "usage": "Output answer to the question Transit the state from Reasoning to Finish"
}, args={'answer': '2'})

In [None]:
test_question

{'split': 'val',
 'image_filename': 'CLEVR_val_003661.png',
 'answer': '0',
 'question': 'how many cylinders are there that are in front of a green cube?',
 'image_index': 3661}