In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from sherpa_ai.memory import Belief
from states import get_actions, add_state_machine
from utils import load_processed_dataset
import random
from langchain_openai import ChatOpenAI
from sherpa_ai.agents.qa_agent import QAAgent
from sherpa_ai.events import Event, EventType
from policy import ReactPolicy
from tqdm.notebook import tqdm
import pandas as pd
from datasets import load_dataset


[32m2024-11-27 09:50:48.502[0m | [1mINFO    [0m | [36msherpa_ai.config[0m:[36m<module>[0m:[36m134[0m - [1mConfig: OpenAI environment variables are set[0m
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\chenp\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [3]:
dataset = load_dataset("Dogdays/clevr_subset", token=True)["train"]

In [4]:
model_name = "gpt-4o-mini"
llm = ChatOpenAI(model=model_name, temperature=0.01)

Next: Run the react state machine with Clevr example

In [5]:
STATE_MACHINE = """
Start --> Filtering: start
Filtering --> Filtering: filter_with_attribute
Filtering --> Querying: continue
Filtering --> Finish: answer
Querying --> Relating: continue
Querying --> Querying: query_attribute
Querying --> Finish: answer
Relating --> Checking: continue
Relating --> Relating: get_related_objects
Relating --> Finish: answer
Checking --> Filtering: continue
Checking --> Checking: get_same_objects
Checking --> Finish: answer
"""

In [7]:
add_state_machine(belief, action_map,  True)

---
State Machine
---
stateDiagram-v2
  direction LR
  classDef s_default fill:white,color:black
  classDef s_inactive fill:white,color:black
  classDef s_parallel color:black,fill:white
  classDef s_active color:red,fill:darksalmon
  classDef s_previous color:blue,fill:azure
  
  state "Start" as Start
  Class Start s_active
  state "Exploring" as Exploring
  Class Exploring s_default
  state Exploring {
    [*] --> Exploring_Filtering
    state "Filtering" as Exploring_Filtering
    state "Checking" as Exploring_Checking
    state "Relating" as Exploring_Relating
    state "Querying" as Exploring_Querying
  }
  state "Finish" as Finish
  Class Finish s_default
  
  Start --> Exploring: start
  Exploring_Filtering --> Exploring_Filtering: filter_with_attribute
  Exploring_Filtering --> Exploring_Relating: other_options
  Exploring_Relating --> Exploring_Checking: other_options
  Exploring_Relating --> Exploring_Relating: get_related_objects
  Exploring_Checking --> Exploring_Filtering

<sherpa_ai.memory.belief.Belief at 0x256a9c1e340>

In [6]:
results = []

AGENT_DESCRIPTION = """
You are a question answering assistant helping users to find answers to their questions based on a specific scene.
Each object in the scene contain the following properties: color, size, shape, material, and a unique identifier.
The properties are from a fixed set of values:
– Size: One of large or small.
– Color: One of gray, red, blue, green, brown, purple, cyan, or yellow.
– Shape: One of cube (block), sphere, or cylinder.
– Material: One of rubber (matte) or metal (shinning).
- Unique identifier: The index of the object in the scene, starting from 0.

{scene}

Objects in the scene also have the following relationships: left, right, front or behind.

Given the question, first identify ALL relevant objects in the scene using filter. Then identify their relations.

If answering the question requires and object that does not exist in the scene, give answer "no" if it is a boolean question, or "0" if it is count question.

When provide action arguments, ONLY use the values from the fixed set of values above.
"""
# Scene: {scene}
idx = 0
use_scene = True

for sample in tqdm(dataset.select(range(100))):
    scene = sample["scene"]
    test_question = sample["question"]


    belief = Belief()
    action_map = get_actions(belief, llm)
    add_state_machine(belief, action_map, print_sm=False)
    
    belief.set("scene", scene)

    if use_scene:
        belief.set("agent_scene", scene)
        agent_description = AGENT_DESCRIPTION.format(scene=f"Scene: {scene}")
    else:
        agent_description = AGENT_DESCRIPTION.format(scene="")

    policy = ReactPolicy(
        role_description=agent_description,
        llm=llm,
    )

    agent = QAAgent(
        llm=llm,
        belief=belief,
        description=agent_description,
        num_runs=10,
        policy=policy,
    )

    belief.set_current_task(
        Event(
            EventType.task, "user", f"{test_question}."
        )
    )
    agent.run()

    if belief.state_machine.state != "Finish":
        belief.state_machine.answer()

    result = [str(event) for event in belief.internal_events]

    answer = belief.get(
        "answer_count_action",
        belief.get(
            "answer_judging_action",
            belief.get(
                "answer_querying_action",
                belief.get("answer_action", "No answer found."),
            ),
        ),
    )

    results.append((result, answer))

  0%|          | 0/1 [00:00<?, ?it/s]

  warn_deprecated(


In [36]:
real_answers = [question["answer"] for question in dataset]

In [37]:
correct = [1 if str(p[1]["answer"]) == str(a) else 0 for p, a in zip(results, real_answers)]
print(len(correct), sum(correct), sum(correct) / len(correct))

100 85 0.85


In [38]:
df = pd.DataFrame({
    "predicted": [r[1]["answer"] for r in results],
    "reasoning": [r[0] for r in results],
    "actual": real_answers
})

df.to_csv(f"state_machine_results_{model_name}_1.csv", index=False)

In [60]:
questions[83]

{'split': 'val',
 'image_filename': 'CLEVR_val_007515.png',
 'answer': 'cylinder',
 'question': 'What shape do three large objects have in common?',
 'image_index': 7515}

In [9]:
results_dict = []
for log, question in results:
    results_dict.append({
        "log": log,
        "question": question
    })

df = pd.DataFrame(results_dict)
df.to_csv("results_sm_gpt-4o.csv", index=False)

In [42]:
scenes[2210]

{'objects': [{'color': 'purple',
   'size': 'large',
   'shape': 'cylinder',
   'material': 'metal'},
  {'color': 'green',
   'size': 'large',
   'shape': 'cylinder',
   'material': 'rubber'},
  {'color': 'gray', 'size': 'large', 'shape': 'cube', 'material': 'metal'},
  {'color': 'brown',
   'size': 'small',
   'shape': 'cylinder',
   'material': 'rubber'},
  {'color': 'green',
   'size': 'small',
   'shape': 'cylinder',
   'material': 'metal'}],
 'relationships': {'right': [[], [0, 4], [0, 1, 4], [0, 1, 2, 4], [0]],
  'behind': [[], [0, 2, 3], [0], [0, 2], [0, 1, 2, 3]],
  'front': [[1, 2, 3, 4], [4], [1, 3, 4], [1, 4], []],
  'left': [[1, 2, 3, 4], [2, 3], [3], [], [1, 2, 3]]}}

In [65]:
print(test_question['question'])

Are there an equal number of shiny and matte objects?


In [66]:
test_question

{'split': 'val',
 'image_filename': 'CLEVR_val_007329.png',
 'answer': 'yes',
 'question': 'Are there an equal number of shiny and matte objects?',
 'image_index': 7329}