# Simple MCTS
This notebook investigates using MCTS for a single static website where multiple actions have to be done to achieve an answer.

The task is to order a macbook with certain configurations. This website was chosen over others because it does not change upon selecting certain elements, which substantially simplifies testing as a website can simply be cached.

The start domain is [this page](https://www.apple.com/shop/buy-mac/macbook-pro/14-inch-space-gray-apple-m3-chip-with-8-core-cpu-and-10-core-gpu-8gb-memory-512gb) with the prompt "order a macbook pro 14 with 24 gb, 2 tb, fast charging and all available software"

Command to run this:
```
python run_demo.py --task_name openended --model_name openai/gpt-4o-mini --start_url https://www.apple.com/shop/buy-mac/macbook-pro/14-inch-space-gray-apple-m3-chip-with-8-core-cpu-and-10-core-gpu-8gb-memory-512gb
```

## Setup and load cached website

In [477]:
website = "https://www.apple.com/shop/buy-mac/macbook-pro/14-inch-space-gray-apple-m3-chip-with-8-core-cpu-and-10-core-gpu-8gb-memory-512gb"

human_prompt = "order a macbook pro 14 with 24 gb ram, 2 tb, fast charging and all available software"

In [570]:
# ideal actions are:
ideal_actions = [
    "click(1007)",  # Select 24GB unified memory
    "click(1038)",  # Select 2TB SSD storage
    "click(1061)",  # Select 96W USB-C Power Adapter
    "click(1112)",  # Select Final Cut Pro software
    "click(1135)",  # Select Logic Pro software
    "click(1209)",  # Add to bag
]

ideal_actions_str = "\n".join(ideal_actions)

In [571]:
print(ideal_actions_str)

click('1007')
click('1038')
click('1061')
click('1112')
click('1135')
click('1209')


In [480]:
txt_file = "../output_example_2.txt"
with open(txt_file, 'r') as file:
    lines = file.readlines()

system_messages = []
prompts = []
actions = []

current_section = None

for line in lines:
    if line.startswith("System Message:"):
        current_section = "System Message"
    elif line.startswith("Prompt:"):
        current_section = "Prompt"
    elif line.startswith("Action:"):
        current_section = "Action"
    else:
        if current_section == "System Message":
            system_messages.append(line)
        elif current_section == "Prompt":
            prompts.append(line)
        elif current_section == "Action":
            actions.append(line)

system_prompt = system_messages[0].split("content='")[-1].strip()
base_prompt = prompts[0].split("content=\'")[-1].strip()
# ideal_actions = actions[:6]

In [481]:
import sys
sys.path.append("../demo_agent")
from agents.legacy.dynamic_prompting import Think, Memory, ActionSpace, Flags

flags=Flags(
    use_html=True,
    use_ax_tree=True,
    use_thinking=True,  # "Enable the agent with a memory (scratchpad)."
    use_error_logs=True,  # "Prompt the agent with the error logs."
    use_memory=False,  # "Enables the agent with a memory (scratchpad)."
    use_history=True,
    use_diff=False,  # "Prompt the agent with the difference between the current and past observation."
    use_past_error_logs=True,  # "Prompt the agent with the past error logs."
    use_action_history=True,  # "Prompt the agent with the action history."
    multi_actions=True,
    action_space="bid",
    use_abstract_example=True,  # "Prompt the agent with an abstract example."
    use_concrete_example=True,  # "Prompt the agent with a concrete example."
    use_screenshot=False,
    enable_chat=True,
    demo_mode="default",
)

think = Think(visible=lambda: flags.use_thinking)
memory = Memory(visible=lambda: flags.use_memory)
action_space = ActionSpace(flags)

def parser(text_answer):
    ans_dict = {}
    try:
        ans_dict.update(think._parse_answer(text_answer))
        ans_dict.update(memory._parse_answer(text_answer))
        ans_dict.update(action_space._parse_answer(text_answer))
    except Exception as e:
        ans_dict['action'] = None
        ans_dict['think'] = None

    return ans_dict, True, ""

In [721]:
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage


with open("../openai_key.txt", "r") as file:
    api_key = file.read().strip()

model = ChatOpenAI(
            model_name="gpt-4o-mini",
            temperature=0.1,#0.1,
            max_tokens=2_000,
            api_key=api_key
        ).bind(logprobs=True)

In [605]:
from langchain_groq import ChatGroq

with open("../groq_key.txt", "r") as file:
    groq_api_key = file.read().strip()

model = ChatGroq(
    model="Llama-3.1-8b-Instant",
    temperature=0.1,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    api_key=groq_api_key,
).bind(logprobs=True)

## MCTS Preliminaries: Build tree, verify trajectory

In [712]:
import re
from html.parser import HTMLParser

class HTMLCleaner(HTMLParser):
    def __init__(self):
        super().__init__()
        self.output = []
        self.tag_stack = []
        self.current_content = []

    def handle_starttag(self, tag, attrs):
        if tag.lower() == 'img':
            return
        
        bid_attr = next((attr for attr in attrs if attr[0] == 'bid'), None)
        self.tag_stack.append((tag, bid_attr, len(self.output)))
        self.current_content.append([])

    def handle_endtag(self, tag):
        if tag.lower() == 'img':
            return

        if self.tag_stack and self.tag_stack[-1][0] == tag:
            start_tag, bid_attr, start_index = self.tag_stack.pop()
            content = ''.join(self.current_content.pop()).strip()

            if content:
                if bid_attr:
                    self.output.insert(start_index, f'<{start_tag} bid="{bid_attr[1]}">')
                else:
                    self.output.insert(start_index, f'<{start_tag}>')
                self.output.append(content)
                self.output.append(f'</{tag}>')

            if self.current_content:
                self.current_content[-1].extend(self.output[start_index:])
                del self.output[start_index:]

    def handle_data(self, data):
        # Remove literal "\n" strings and actual newlines, then normalize spaces
        normalized_data = re.sub(r'(\\n|\n|\r)+', '', data)
        normalized_data = re.sub(r'\s+', ' ', normalized_data)
        if self.current_content:
            self.current_content[-1].append(normalized_data)
        else:
            self.output.append(normalized_data)

def clean_html(html_content):
    # Replace literal "\n" with actual newlines before parsing
    html_content = html_content.replace('\\n', '\n')
    cleaner = HTMLCleaner()
    cleaner.feed(html_content)
    return ''.join(cleaner.output).strip()

In [708]:
html = base_prompt.split("# ")[4]
c_html = clean_html(html)
len(html), len(c_html)

288727

In [713]:
simple_action_space = """Action space:\\n\\1 type of actions are available.\\n\\nclick(bid: int)\\n    Description: Click an element.\\n    Examples:\\n        click(\\\'151\\\')\\n\\n    Multiple actions can be provided at once, but will be executed sequentially without any feedback from the page.\\nExample:\\nfill(\\\'a12\\\', \\\'example with "quotes"\\\')\\nclick(\\\'a51\\\')\\nclick(\\\'48\\\', button=\\\'middle\\\', modifiers=[\\\'Shift\\\'])\\n\\n"""

def build_action_prompt(base_prompt, actions, action_thoughts, thoughts):
    base_splits = base_prompt.split("# ")
    # change html obs
    html = base_splits[4][7:]
    base_splits[4] = "HTML:" + clean_html(html)

    hist_split = base_splits[6]
    hist_instruction = hist_split[:-4]
    hist_end = hist_split[-4:]
    act_thought_list = [a + " #" + t for a, t in zip(actions, action_thoughts)]
    new_hist = hist_instruction + " Actions: [" + ", ".join(act_thought_list) + "]; Thoughts [" + ", ".join(thoughts) + "]" + hist_end
    base_splits[6] = new_hist
    base_splits[7] = simple_action_space
    new_prompt = "# ".join(base_splits)
    new_prompt += " # Final Instruction: Given that the last actions are: " + ", ".join(actions) + ", what would you do next? Do not pick an action that you already tried."
    return new_prompt

In [714]:
actions = ['click(124)', 'click(12)']
action_thoughts = ["I will click on the 24 gb option", "I will click on the 2 tb option"]
thoughts = ["I will click on the 24 gb option", "I will click on the 2 tb option"]

new_prompt = build_action_prompt(base_prompt, actions, action_thoughts, thoughts)
new_prompt.split("# ")

['',
 'Instructions\\n\\nYou are a UI Assistant, your goal is to help the user perform tasks using a web browser. You can\\ncommunicate with the user via a chat, in which the user gives you instructions and in which you\\ncan send back messages. You have access to a web browser that both you and the user can see,\\nand with which only you can interact via specific commands.\\n\\nReview the instructions from the user, the current state of the page and all other information\\nto find the best possible next action to accomplish your goal. Your answer will be interpreted\\nand executed by a program, make sure to follow the formatting instructions.\\n\\n#',
 'Chat messages:\\n\\n - [assistant] Hi! I am your UI assistant, I can perform web tasks for you. What can I help you with?\\n - [user] order a macbook pro 14 with 24 gb ram, 2 tb, fast charging and all available software\\n',
 'Observation of current step:\\n\\n#',
 'HTML:<head bid="1"><title bid="7">14-inch MacBook Pro - Space Gray - A

In [486]:
# ground truth
actions = []
action_thoughts = []
thoughts = []

new_prompt = build_action_prompt(base_prompt, actions, action_thoughts, thoughts)
chat_messages = [
    SystemMessage(content=system_prompt),
    HumanMessage(content=new_prompt+"Think about what to do and then predict all actions at once to complete the task."),
]
out = model.invoke(chat_messages)
ans_dict = parser(out.content)

In [620]:
ans_dict

({'think': 'To order a MacBook Pro 14 with the specified requirements, I need to customize the laptop by selecting features such as RAM, storage, power adapter, and software packages. Currently, the page is displaying the default configuration options, and I need to change the RAM to 24GB, storage to 2TB, and select additional software options such as Final Cut Pro and Logic Pro.',
  'action': "fill('memory_aos_phantom_z1c8_065_cg1l_3', '')  # Select 24GB RAM\nfill('hard_drivesolid_state_drive_aos_phantom_z1c8_065_cg1p_3', '')  # Select 2TB SSD Storage\nclick('sw_final_cut_pro_z1c8_065_cg37_2')  # Select Final Cut Pro\nclick('sw_logic_pro_z1c8_065_cg39_2')  # Select Logic Pro\nclick('1208')  # Click add to cart button"},
 True,
 '')

In [523]:
out.response_metadata['logprobs']

{'content': [{'token': '<th',
   'bytes': [60, 116, 104],
   'logprob': -1.7120534e-05,
   'top_logprobs': []},
  {'token': 'ink',
   'bytes': [105, 110, 107],
   'logprob': -1.9361265e-07,
   'top_logprobs': []},
  {'token': '>\n',
   'bytes': [62, 10],
   'logprob': -0.00065048266,
   'top_logprobs': []},
  {'token': 'To',
   'bytes': [84, 111],
   'logprob': -0.5301656,
   'top_logprobs': []},
  {'token': ' order',
   'bytes': [32, 111, 114, 100, 101, 114],
   'logprob': -0.42212373,
   'top_logprobs': []},
  {'token': ' a',
   'bytes': [32, 97],
   'logprob': -0.7576712,
   'top_logprobs': []},
  {'token': ' Mac',
   'bytes': [32, 77, 97, 99],
   'logprob': -0.033206448,
   'top_logprobs': []},
  {'token': 'Book',
   'bytes': [66, 111, 111, 107],
   'logprob': -0.00080510264,
   'top_logprobs': []},
  {'token': ' Pro',
   'bytes': [32, 80, 114, 111],
   'logprob': -2.220075e-06,
   'top_logprobs': []},
  {'token': ' ', 'bytes': [32], 'logprob': -0.71397793, 'top_logprobs': []},
  {

In [487]:
print(f"Predicted:\n{ans_dict[0]['action']}")
print(f"\nIdeal:\n{ideal_actions_str}")

Predicted:
fill('memory_aos_phantom_z1c8_065_cg1l_3', '')  # Select 24GB RAM
fill('hard_drivesolid_state_drive_aos_phantom_z1c8_065_cg1p_3', '')  # Select 2TB SSD Storage
click('sw_final_cut_pro_z1c8_065_cg37_2')  # Select Final Cut Pro
click('sw_logic_pro_z1c8_065_cg39_2')  # Select Logic Pro
click('1208')  # Click add to cart button

Ideal:
click('1007') # Select 24GB unified memory
click('1038') # Select 2TB SSD storage
click('1061') # Select 96W USB-C Power Adapter
click('1112') # Select Final Cut Pro software
click('1135') # Select Logic Pro software
click('1209') # Add to bag


In [942]:
from typing import List
from langchain_core.pydantic_v1 import BaseModel, Field

class Action(BaseModel):
    think: str = Field(description="The goal of the next step to eventually accomplish the task")
    action: str = Field(description="The single action to accomplish the task. Should be click(<int>)")

class Plan(BaseModel):
    think: str = Field(description="The overall goal and thoughts to accomplish the task")
    plan: List[Action] = Field(description="Possible actions to take next to get closer to the goal. Should have length of at least 7")

structured_llm = model.with_structured_output(Plan, include_raw=True)
greedy_llm = model.with_structured_output(Action, include_raw=True)

In [723]:
def expand_predict(action_prompt):
    chat_messages = [
        SystemMessage(content=system_prompt),
        HumanMessage(content=action_prompt),
    ]
    answer = structured_llm.invoke(chat_messages)
    return answer

def greedy_predict(action_prompt):
    chat_messages = [
        SystemMessage(content=system_prompt),
        HumanMessage(content=action_prompt),
    ]
    answer = greedy_llm.invoke(chat_messages)
    return answer

In [468]:
actions = []
action_thoughts = []
thoughts = []

new_prompt = build_action_prompt(base_prompt, actions, action_thoughts, thoughts)

In [469]:
expand_answer = expand_predict(new_prompt)
greedy_answer = greedy_predict(new_prompt)

In [470]:
print([p.action for p in expand_answer.plan])
print(greedy_answer.action)

['click(1011)', 'click(1039)', 'click(1111)']
click(1006)


In [490]:
class Node:
    def __init__(self, actions, action_thoughts, thoughts, parent=None, depth=0):
        self.actions = actions
        self.action_thoughts = action_thoughts
        self.thoughts = thoughts
        self.parent = parent
        self.children = []
        self.visits = 1  # add 1 to avoid division by 0 and thus never have inf value
        self.value = 0.1
        self.depth = depth

In [491]:
import copy
def append_children(node, n):
    if n == 0:
        return
    action_prompt = build_action_prompt(base_prompt, node.actions, node.action_thoughts, node.thoughts)

    answer = expand_predict(action_prompt)
    new_thought = answer.think
    for action in answer.plan:
        new_actions = node.actions + [action.action]
        new_action_thoughts = node.action_thoughts + [action.think]
        new_thoughts = node.thoughts + [new_thought]
        child = Node(copy.deepcopy(new_actions),
                     copy.deepcopy(new_action_thoughts),
                     copy.deepcopy(new_thoughts),
                     parent=node)
        node.children.append(child)
        append_children(child, n-1)

In [459]:
actions = []
action_thoughts = []
thoughts = []

root = Node(actions, action_thoughts, thoughts)
append_children(root, 4)

In [836]:
def print_tree(node, indent=""):
    if len(node.actions) > 0:
        # print(f"{indent}Thoughts: {node.thoughts[-1]}")
        # print(f"{indent}Action Thoughts: {node.action_thoughts[-1]}")
        print(f"{indent}Action: {node.actions[-1]}")
        print(f"{indent}Value: {node.value}")
    print(f"{indent}Children: {len(node.children)}")
    for child in node.children:
        print_tree(child, indent + "  ")

print_tree(root)

Children: 5
  Action: click(1006)
  Value: 0.1
  Children: 5
    Action: click(1006)
    Value: 0.1
    Children: 4
      Action: click(1007)
      Value: 0.1
      Children: 5
        Action: click(1007)
        Value: 0.1
        Children: 4
          Action: click(1007)
          Value: 0.1
          Children: 5
            Action: click(1007)
            Value: 0.1
            Children: 0
            Action: click(1039)
            Value: 0.1
            Children: 0
            Action: click(1061)
            Value: 0.1
            Children: 0
            Action: click(1209)
            Value: 0.1
            Children: 0
            Action: click(1204)
            Value: 0.1
            Children: 0
          Action: click(1039)
          Value: 0.1
          Children: 0
          Action: click(1061)
          Value: 0.1
          Children: 0
          Action: click(1112)
click(1136)
          Value: 0.1
          Children: 0
        Action: click(1039)
        Value: 0.1
        Ch

In [464]:
# create all possible trajectories of actions from tree
def get_trajectories(node):
    if not node.children:
        return [[node.actions]]
    
    trajectories = []
    for child in node.children:
        child_trajectories = get_trajectories(child)
        for trajectory in child_trajectories:
            trajectories.append([node.actions] + trajectory)
    
    return trajectories

trajectories = get_trajectories(root)
trajectories = [t[-1] for t in trajectories]

## Build MCTS

In [856]:
class Node:
    def __init__(self, actions, action_thoughts, thoughts, parent=None, depth=0):
        self.actions = actions
        self.action_thoughts = action_thoughts
        self.thoughts = thoughts
        self.parent = parent
        self.children = []
        self.visits = 1  # add 1 to avoid division by 0 and thus never have inf value
        self.value = 0.01
        self.depth = depth

In [471]:
# greedy baseline
max_depth = 6

actions = []
action_thoughts = []
thoughts = []

root = Node(actions, action_thoughts, thoughts)

for _ in range(max_depth):
    answer = greedy_predict(build_action_prompt(base_prompt, root.actions, root.action_thoughts, root.thoughts))
    root.action_thoughts += [answer.think]
    root.actions += [answer.action]
print(root.actions)

['click(1006)', 'click(1038)', 'click(1066)', 'click(1029)', 'click(1054)', 'click(1100)\nclick(1123)']


In [628]:
import time
# get logprobs
actions = []
action_thoughts = []
thoughts = []

root = Node(actions, action_thoughts, thoughts)

start_time = time.time()
answer = expand_predict(build_action_prompt(base_prompt, root.actions, root.action_thoughts, root.thoughts))
print(f"Time taken: {time.time() - start_time}")

Time taken: 18.054744005203247


In [629]:
answer

{'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_joDR7wJnEBImg6TZMwJl42rB', 'function': {'arguments': '{"think":"I need to customize the MacBook Pro with the specified configurations: 24 GB RAM, 2 TB storage, fast charging, and all available software. The current page allows customization, but I need to select the appropriate options for RAM, storage, and software. I will first look for the memory options to select 24 GB RAM.","plan":[{"think":"Select the memory option for 24 GB RAM.","action":"click(1006)"},{"think":"Select the storage option for 2 TB SSD.","action":"click(1038)"},{"think":"Look for the power adapter options to ensure fast charging.","action":"click(1047)"},{"think":"Select the keyboard language option if needed.","action":"click(1070)"},{"think":"Look for the software options to select all available software.","action":"click(1096)"}]}', 'name': 'Plan'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 181, 'p

In [630]:
answer['raw'].response_metadata['logprobs']

In [631]:
answer['parsed'].plan

[Action(think='Select the memory option for 24 GB RAM.', action='click(1006)'),
 Action(think='Select the storage option for 2 TB SSD.', action='click(1038)'),
 Action(think='Look for the power adapter options to ensure fast charging.', action='click(1047)'),
 Action(think='Select the keyboard language option if needed.', action='click(1070)'),
 Action(think='Look for the software options to select all available software.', action='click(1096)')]

In [908]:
import math
import random

def select(node: Node, alpha: float = 1.0) -> Node:
    while node.children:
        if len(node.children) < 1:
            return node
        node = ucb_select(node, alpha)
    return node

def ucb_select(node: Node, alpha: float = 1.0) -> Node:
    scores = [ucb_score(c, node.visits, alpha) for c in node.children]
    max_score = max(scores)
    max_children = [c for c, s in zip(node.children, scores) if s == max_score]
    return random.choice(max_children)

def ucb_score(node: Node, total_visits: int, alpha: float = 1.0) -> float:
    if node.visits == 0:
        return float('inf')
    return (node.value / node.visits) + alpha * math.sqrt(2 * math.log(total_visits) / node.visits)

def expand(node: Node) -> Node:
    # this should be taking an action and getting to a new state
    answer = expand_predict(build_action_prompt(base_prompt, root.actions, root.action_thoughts, root.thoughts))
    parsed_answer = answer['parsed']
    new_thought = parsed_answer.think
    for action in parsed_answer.plan:
        new_actions = node.actions + [action.action]
        new_action_thoughts = node.action_thoughts + [action.think]
        new_thoughts = node.thoughts + [new_thought]
        child = Node(copy.deepcopy(new_actions),
                     copy.deepcopy(new_action_thoughts),
                     copy.deepcopy(new_thoughts),
                     parent=node,
                     depth=node.depth+1)
        node.children.append(child)

def backpropagate(node: Node, reward: float):
    while node:
        node.visits += 1
        node.value += reward
        node = node.parent

def verify_success(actions):
    if actions == ideal_actions:
        return True
    return False

def best_child(node: Node) -> Node:
    max_visits = max(c.visits for c in node.children)
    candidates = [c for c in node.children if c.visits == max_visits]
    return max(candidates, key=lambda c: c.value)

node = Node([], [], [])
ucb_score(node, 1)

0.01

In [738]:
ideal_actions = [a.replace("'", "") for a in ideal_actions]
ideal_bids = [s.split("'")[1] for s in ideal_actions]

In [924]:
def get_t_value(trajectory):
    # seen_actions = []
    # correct_actions = 0
    # for a in trajectory:
    #     if a in ideal_bids and a not in seen_actions:
    #         correct_actions += 1
    #         seen_actions.append(a)
    #     if a in ideal_actions and a not in seen_actions:
    #         correct_actions += 1
    #         seen_actions.append(a)
    # FIXME this is a hacky way to handle the fact that the bids are not assigned nicely
    for i, t in enumerate(trajectory):
        if t == 'click(1006)':
            trajectory[i] = 'click(1007)'
        if t == 'click(1039)':
            trajectory[i] = 'click(1038)'

    correct_actions = 0.
    for a, b in zip(trajectory, ideal_actions):
        if a == b:
            correct_actions += 1
        else:
            break

    score = correct_actions / len(ideal_actions)

    return score

t = ["click(1007)", "click(1038)", "click(1061)", "click(1135)", "click(1209)"]
get_t_value(ideal_actions), get_t_value(t)

(1.0, 0.5)

In [925]:
ideal_actions

['click(1007)',
 'click(1038)',
 'click(1061)',
 'click(1112)',
 'click(1135)',
 'click(1209)']

In [928]:
# t = ['click(1007)', 'click(1039)', 'click(1100)', 'click(1119)', 'click(1141)']
t = ['click(1007)', 'click(1039)', 'click(1061)', 'click(1112)\nclick(1136)']
get_t_value(t)

0.5

In [944]:
from tqdm import tqdm

# get initial options
max_depth = 6
max_iters = 100
alpha = 0.5

actions = []
action_thoughts = []
thoughts = []

# TODO change node to contain one action per node
root = Node(actions, action_thoughts, thoughts)

for iter_idx in tqdm(range(max_iters)):
    # selection
    # - TODO figure out how to do this proportional to logprobs
    selected_node = select(root, alpha)
    # print(f"Selected node: {selected_node.actions}")

    # expansion + simulation
    remaining_depth = max_depth - selected_node.depth

    value = get_t_value(selected_node.actions)

    if remaining_depth > 0:
        for _ in range(remaining_depth):
            expand(selected_node)
            selected_node = select(selected_node)
            value = get_t_value(selected_node.actions)
            if value == 1.0:
                break

    # backpropagation
    # print(f"Selected node: {selected_node.actions}")
    # print(f"Value: {value}")
    backpropagate(selected_node, value)

100%|██████████| 100/100 [06:41<00:00,  4.02s/it]


In [930]:
print(ideal_actions)

['click(1007)', 'click(1038)', 'click(1061)', 'click(1112)', 'click(1135)', 'click(1209)']


In [947]:
# print_tree(root)

In [948]:
[c.actions for c in root.children]

[['click(1007)'],
 ['click(1038)'],
 ['click(1100)'],
 ['click(1112)'],
 ['click(1136)'],
 ['click(1209)'],
 ['click(849)']]

In [945]:
best_c = best_child(root)
while best_c.children:
    print([c.actions[-1] for c in best_c.children])
    print([c.value for c in best_c.children])
    print([c.visits for c in best_c.children])
    best_c = best_child(best_c)
print(best_c.actions)

['click(1007)', 'click(1038)', 'click(1049)', 'click(1062)', 'click(1098)', 'click(1112)', 'click(1136)']
[0.3433333333333333, 33.67666666666666, 0.3433333333333333, 0.3433333333333333, 0.3433333333333333, 0.3433333333333333, 0.3433333333333333]
[3, 77, 3, 3, 3, 3, 3]
['click(1007)', 'click(1038)', 'click(1061)', 'click(1112)', 'click(1118)', 'click(1141)', 'click(1209)']
[1.3433333333333333, 1.01, 25.009999999999998, 0.6766666666666666, 3.0100000000000002, 1.01, 1.6766666666666665]
[5, 4, 51, 3, 10, 4, 6]
['click(1006)', 'click(1038)', 'click(1062)', 'click(1100)', 'click(1112)', 'click(1136)', 'click(1209)']
[0.01, 24.509999999999998, 0.01, 0.01, 0.01, 0.01, 0.51]
[1, 50, 1, 1, 1, 1, 2]
['click(1006)', 'click(1039)', 'click(1061)', 'click(1100)', 'click(1112)', 'click(1136)', 'click(1209)']
[0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 24.509999999999998]
[1, 1, 1, 1, 1, 1, 50]
['click(1006)', 'click(1038)', 'click(1062)', 'click(1100)', 'click(1114)', 'click(1138)', 'click(1209)']
[0.01, 24.

In [921]:
ideal_actions

['click(1007)',
 'click(1038)',
 'click(1061)',
 'click(1112)',
 'click(1135)',
 'click(1209)']