In [132]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [133]:
from sherpa_ai.memory import Belief
from states import get_actions, add_state_machine
from utils import load_processed_dataset
import random
from langchain_openai import ChatOpenAI
from sherpa_ai.agents.qa_agent import QAAgent
from sherpa_ai.events import Event, EventType
from clevr_policy import ReactPolicy
from tqdm.notebook import tqdm
import pandas as pd


In [134]:
def process_scene(scene):
    if "image_index" in scene:
        scene.pop("image_index")

    for obj in scene["objects"]:
        if "rotation" in obj:
            obj.pop("rotation")
        if "pixel_coords" in obj:
            obj.pop("pixel_coords")
        if "3d_coords" in obj:
            obj.pop("3d_coords")
    
    if "image_filename" in scene:
        scene.pop("image_filename")

    if "split" in scene:
        scene.pop("split")

    if "directions" in scene:
        scene.pop("directions")


In [135]:
model_name = "gpt-4o-mini"
llm = ChatOpenAI(model=model_name)
scenes, questions = load_processed_dataset()

In [136]:
process_scene(scenes[0])
scenes[0]

{'objects': [{'color': 'brown',
   'size': 'large',
   'shape': 'cylinder',
   'material': 'rubber'},
  {'color': 'gray', 'size': 'large', 'shape': 'cube', 'material': 'rubber'},
  {'color': 'green',
   'size': 'small',
   'shape': 'cylinder',
   'material': 'rubber'},
  {'color': 'purple', 'size': 'large', 'shape': 'sphere', 'material': 'metal'},
  {'color': 'gray', 'size': 'small', 'shape': 'cube', 'material': 'metal'}],
 'relationships': {'right': [[1, 2, 3, 4], [3, 4], [1, 3, 4], [4], []],
  'behind': [[2, 3], [0, 2, 3, 4], [3], [], [0, 2, 3]],
  'front': [[1, 4], [], [0, 1, 4], [0, 1, 2, 4], [1]],
  'left': [[], [0, 2], [0], [0, 1, 2], [0, 1, 2, 3]]}}

Next: Run the react state machine with Clevr example

In [140]:
add_state_machine(belief, action_map)

---
State Machine
---
stateDiagram-v2
  direction LR
  classDef s_default fill:white,color:black
  classDef s_inactive fill:white,color:black
  classDef s_parallel color:black,fill:white
  classDef s_active color:red,fill:darksalmon
  classDef s_previous color:blue,fill:azure
  
  state "Start" as Start
  Class Start s_active
  state "Counting" as Counting
  Class Counting s_default
  state "Querying" as Querying
  Class Querying s_default
  state "Judging" as Judging
  Class Judging s_default
  state "Finish" as Finish
  Class Finish s_default
  
  Start --> Counting: count_question
  Start --> Querying: query_question
  Start --> Judging: judge_question
  Judging --> Judging: get_related_objects | filter_with_attribute | get_same_objects
  Judging --> Finish: answer_judging
  Counting --> Counting: get_related_objects | filter_with_attribute | get_same_objects
  Counting --> Finish: answer_counting
  Querying --> Finish: answer_querying
  [*] --> Start


<sherpa_ai.memory.belief.Belief at 0x23a8e8d48b0>

In [138]:
STATE_MACHINE = """
Start --> Filtering: start
Filtering --> Filtering: filter_with_attribute
Filtering --> Querying: continue
Filtering --> Finish: answer
Querying --> Relating: continue
Querying --> Querying: query_attribute
Querying --> Finish: answer
Relating --> Checking: continue
Relating --> Relating: get_related_objects
Relating --> Finish: answer
Checking --> Filtering: continue
Checking --> Checking: get_same_objects
Checking --> Finish: answer
"""

In [145]:
results = []

for test_question in tqdm(questions):
    scene = scenes[test_question["image_index"]]
    process_scene(scene)

    AGENT_DESCRIPTION = f"""
    You are a question answering assistant helping users to find answers to their questions based on a specific scene.
    Each object in the scene contain the following properties: color, size, shape, material, and a unique identifier.
    The properties are from a fixed set of values:
    – Size: One of large or small.
    – Color: One of gray, red, blue, green, brown, purple, cyan, or yellow.
    – Shape: One of cube (block), sphere, or cylinder.
    – Material: One of rubber (matte) or metal (shinning).
    - Unique identifier: The index of the object in the scene, starting from 0.

    Objects in the scene also have the following relationships: left, right, front or behind.


    Use the available actions to interact with the scene and collect information to answer the question.
    When provide action arguments, ONLY use the values from the fixed set of values above.
    """

    belief = Belief()
    action_map = get_actions(belief, llm)
    add_state_machine(belief, action_map)

    belief.set("scene", scene)

    policy = ReactPolicy(
        role_description=AGENT_DESCRIPTION,
        output_instruction="Output the answer to the question.",
        llm=llm,
    )

    agent = QAAgent(
        llm=llm,
        belief=belief,
        description=AGENT_DESCRIPTION,
        num_runs=10,
        policy=policy,
    )

    belief.set_current_task(
        Event(
            EventType.task, "user", f"Answer the question: {test_question['question']}."
        )
    )
    agent.run()

    result = [str(event) for event in belief.internal_events]

    answer = belief.get(
        "answer_count_action",
        belief.get(
            "answer_judging_action",
            belief.get(
                "answer_querying_action",
                belief.get("answer_action", "No answer found."),
            ),
        ),
    )

    results.append((result, answer))

  0%|          | 0/100 [00:00<?, ?it/s]

[32m2024-10-30 09:56:05.132[0m | [31m[1mERROR   [0m | [36msherpa_ai.agents.base[0m:[36mrun[0m:[36m104[0m - [31m[1m'Shape'[0m
[33m[1mTraceback (most recent call last):[0m

  File "[32mc:\Users\chenp\anaconda3\envs\sherpa\lib\[0m[32m[1mrunpy.py[0m", line [33m197[0m, in [35m_run_module_as_main[0m
    [35m[1mreturn[0m [1m_run_code[0m[1m([0m[1mcode[0m[1m,[0m [1mmain_globals[0m[1m,[0m [36m[1mNone[0m[1m,[0m
    [36m       │         │     └ [0m[36m[1m{'__name__': '__main__', '__doc__': 'Entry point for launching an IPython kernel.\n\nThis is separate from the ipykernel pack...[0m
    [36m       │         └ [0m[36m[1m<code object <module> at 0x0000023A922DE190, file "c:\Users\chenp\anaconda3\envs\sherpa\lib\site-packages\ipykernel_launcher....[0m
    [36m       └ [0m[36m[1m<function _run_code at 0x0000023A922DA9D0>[0m

  File "[32mc:\Users\chenp\anaconda3\envs\sherpa\lib\[0m[32m[1mrunpy.py[0m", line [33m87[0m, in [35m_run_code

In [125]:
results

[(['start: EventType.action - Action: start starts, Args: {}',
   'start: EventType.action_output - Action: start finishes, Observation: There are 4 object: [0, 1, 2, 3]',
   "filter_with_attribute: EventType.action - Action: filter_with_attribute starts, Args: {'object_ids': [0, 1, 2, 3], 'attribute': 'shape', 'value': 'sphere'}",
   'filter_with_attribute: EventType.action_output - Action: filter_with_attribute finishes, Observation: [3]',
   "query_attribute: EventType.action - Action: query_attribute starts, Args: {'object_id': 3, 'attribute': 'material'}",
   'query_attribute: EventType.action_output - Action: query_attribute finishes, Observation: rubber',
   "answer: EventType.action - Action: answer starts, Args: {'answer': 'rubber'}",
   'answer: EventType.action_output - Action: answer finishes, Observation: rubber'],
  'rubber')]

In [126]:
test_question

{'split': 'val',
 'image_filename': 'CLEVR_val_007751.png',
 'answer': 'rubber',
 'question': 'What material is the ball to the far left?',
 'image_index': 7751}

In [146]:
real_answers = [question["answer"] for question in questions]

In [147]:
correct = [1 if str(p[1]) == str(a) else 0 for p, a in zip(results, real_answers)]
print(len(correct), sum(correct), sum(correct) / len(correct))

100 24 0.24


In [148]:
df = pd.DataFrame({
    "predicted": [r[1] for r in results],
    "reasoning": [r[0] for r in results],
    "actual": real_answers
})

df.to_csv(f"routing_results_{model_name}_no_scene.csv", index=False)

In [60]:
questions[83]

{'split': 'val',
 'image_filename': 'CLEVR_val_007515.png',
 'answer': 'cylinder',
 'question': 'What shape do three large objects have in common?',
 'image_index': 7515}

In [9]:
results_dict = []
for log, question in results:
    results_dict.append({
        "log": log,
        "question": question
    })

df = pd.DataFrame(results_dict)
df.to_csv("results_sm_gpt-4o.csv", index=False)

In [42]:
scenes[2210]

{'objects': [{'color': 'purple',
   'size': 'large',
   'shape': 'cylinder',
   'material': 'metal'},
  {'color': 'green',
   'size': 'large',
   'shape': 'cylinder',
   'material': 'rubber'},
  {'color': 'gray', 'size': 'large', 'shape': 'cube', 'material': 'metal'},
  {'color': 'brown',
   'size': 'small',
   'shape': 'cylinder',
   'material': 'rubber'},
  {'color': 'green',
   'size': 'small',
   'shape': 'cylinder',
   'material': 'metal'}],
 'relationships': {'right': [[], [0, 4], [0, 1, 4], [0, 1, 2, 4], [0]],
  'behind': [[], [0, 2, 3], [0], [0, 2], [0, 1, 2, 3]],
  'front': [[1, 2, 3, 4], [4], [1, 3, 4], [1, 4], []],
  'left': [[1, 2, 3, 4], [2, 3], [3], [], [1, 2, 3]]}}

In [65]:
print(test_question['question'])

Are there an equal number of shiny and matte objects?


In [66]:
test_question

{'split': 'val',
 'image_filename': 'CLEVR_val_007329.png',
 'answer': 'yes',
 'question': 'Are there an equal number of shiny and matte objects?',
 'image_index': 7329}