# Demystifying telemetry.sqlite artifact

When you run logging enabled, a db file will be generated in sqlite format.

See https://microsoft.github.io/autogen/docs/Use-Cases/enhanced_inference/#logging for more info.

In this notebook we will only use two tables, chat_completions and events.

## Prereq

In [None]:
%pip install jsondiff
%pip install ipywidgets
%pip install markdown

## Load database, and do some data wrangling

In [None]:
import sqlite3
from os import path


def load_db(db_base_path: str, question_id: int, run_id: int = 0):
    db_path = path.join(db_base_path, str(question_id), str(run_id), "telemetry.sqlite")
    return sqlite3.connect(db_path)

In [None]:
import json
from datetime import datetime


# this func gets the image content and returns it - it also removes the from the json to avoid having duplicate data
def get_first_image_from_event(event):
    def subcontent_pass(content):
        for subcontent in content:
            if subcontent.get("type") == "image_url":
                image_url = subcontent["image_url"]["url"].split(",", 1)[1]
                subcontent["image_url"]["url"] = "<image_placeholder>"
                return image_url

    image_url = None
    if event["event_name"] == "received_message":
        json_state = event["json_state"]
        return subcontent_pass(json_state["message"]["content"])
    elif event["event_name"] == "request_to_oai_cc":
        for message in event["request"]["messages"]:
            if isinstance(message["content"], list):
                return subcontent_pass(message["content"])
    elif event["event_name"] == "reply_func_executed":
        json_state = event["json_state"]
        return subcontent_pass(json_state["reply"]["content"])
    else:
        raise Exception(f"image detected in event type {event['event_name']} but not loaded.")

    return image_url


# load table events and do some processing
def db_events_to_list(conn):
    c = conn.cursor()
    c.execute("SELECT * FROM events")
    rows = c.fetchall()
    colnames = c.description

    final = [dict(zip([col[0] for col in colnames], row)) for row in rows]
    for e in final:
        e["image_content"] = "data:image/png;base64" in e["json_state"]
        e["json_state"] = json.loads(e["json_state"])
        if e["image_content"]:
            e["image"] = get_first_image_from_event(e)
        e["timestamp"] = datetime.strptime(e["timestamp"], "%Y-%m-%d %H:%M:%S.%f")
    return final


# load table chat_completions and do some processing
def db_cc_to_list(conn):
    c = conn.cursor()
    c.execute("SELECT * FROM chat_completions")
    rows = c.fetchall()
    colnames = c.description

    final = [dict(zip([col[0] for col in colnames], row)) for row in rows]
    for e in final:
        e["image_content"] = "data:image/png;base64" in e["request"]
        e["request"] = json.loads(e["request"])
        e["timestamp"] = datetime.strptime(e["start_time"], "%Y-%m-%d %H:%M:%S.%f")
        e["start_time"] = datetime.strptime(e["start_time"], "%Y-%m-%d %H:%M:%S.%f")
        e["end_time"] = datetime.strptime(e["end_time"], "%Y-%m-%d %H:%M:%S.%f")
        e["total_seconds"] = (e["end_time"] - e["start_time"]).seconds
        e["event_name"] = "request_to_oai_cc"
        e["source_name"] = "chat_completions_table"
        if e["image_content"]:
            e["image"] = get_first_image_from_event(e)
    return final

# Brief explanation

The logs collected by scenario.py in https://github.com/microsoft/autogen/blob/ct_webarena/samples/tools/autogenbench/scenarios/WebArena/Templates/TwoAgents/scenario.py

generate specific events.

In this section we split the list based on these events. We split based on 'source_name' - this way we can keep track of the overall sections inside scenario.py [login phase, setup, actual run, etc]. Then we split this previously split lists by events that have images to simplify the rendering of events with their corresponding last screenshot taken.

In [None]:
import heapq


def remove_noisy_events(events):
    filtered_events = [
        event
        for event in events
        if event["event_name"] != "reply_func_executed"
        or (event["json_state"]["final"] and event["json_state"]["reply"] is not None)
    ]

    filtered_events = [
        event
        for event in filtered_events
        if event["event_name"] != "mws_response"
        and event["event_name"] != "Response"
        and event["event_name"] != "mws_request"
    ]

    # print("filtered", len(filtered_events))
    return filtered_events


def split_on_event(data: list, func, tag: str):
    def group(sequence):
        current_group = {"inner": [], tag: None}
        for item in sequence:
            if func(item):
                if len(current_group["inner"]) > 0 or current_group[tag]:
                    if len(current_group["inner"]) == 0:
                        current_group["inner"].append({})
                    yield current_group
                current_group = {"inner": []}
                current_group[tag] = item
                current_group["inner"] = []
            else:
                current_group["inner"].append(item)

        if len(current_group["inner"]) > 0 or current_group[tag]:
            if len(current_group["inner"]) == 0:
                current_group["inner"].append({})
            yield current_group

    return list(group(data))


def get_all_events(conn, filter=True):
    db_events = db_events_to_list(conn)
    openai_requests = db_cc_to_list(conn)
    merged = list(heapq.merge(db_events, openai_requests, key=lambda x: x["timestamp"]))
    if filter:
        return remove_noisy_events(merged)
    else:
        return merged


def get_relevant_events(conn):
    filtered_events = get_all_events(conn, filter=True)
    merged_and_nested = split_on_event(
        filtered_events, lambda event: event["source_name"] == "scenario.py", tag="scenariopy"
    )

    # the ui has a preference to traverse based on events that have an image - this is where we group by having an image
    for i, section in enumerate(merged_and_nested):
        section["inner"] = split_on_event(
            section["inner"], lambda event: event.get("image", None) is not None, tag="image_holder"
        )

    return merged_and_nested

In [None]:
def sanity_check(merged_and_nested):
    print("outer", type(merged_and_nested))
    one_run = merged_and_nested[0]
    print("one run keys", one_run.keys())
    print("inner one run", type(one_run["inner"]))
    one_section = one_run["inner"][0]  # one phase inside scenario.py
    print("one section keys", one_section.keys())
    triggered_one_msg = one_section["inner"][0]
    print("one single event", triggered_one_msg.keys())


# sanity_check(merged_and_nested)

# render util funcs (event -> md/str)

In [None]:
def content_md(content, msg_num):
    output = ""
    if isinstance(content, str):
        output += f"**message {msg_num}**:\n\n {content}\n\n"
    elif isinstance(content, list):
        for j, subcontent in enumerate(content):
            if subcontent.get("type") == "text":
                output += f'**message {msg_num} - part {j}/{len(content)-1}**:\n\n {subcontent["text"]}\n\n'
            elif subcontent.get("type") == "image_url":
                output += f"**message {msg_num} - part {j}/{len(content)-1}**:\n\n *see image below*\n\n"
    else:
        output += "UPDATE THIS TO HANDLE OTHER TYPES OF CONTENTS"
    return output


def oai_request_event_md(event, prefix=""):
    output = f"""###{prefix}openai api request
- timestamp: **{event["timestamp"]}**
- is_cached: **{event["is_cached"]}**
- cost: **{event["cost"]}**
- total_seconds: **{event["total_seconds"]}**

"""
    messages = ""
    for i, message in enumerate(event["request"]["messages"]):
        content = message.get("content", "no_content_message?")
        messages = content_md(content, i) + messages
    output += messages

    return output


def reply_func_event_md(event, prefix=""):
    output = f"""###{prefix}reply function executed inside {event["source_name"]} [{event["agent_class_name"]}]
- timestamp: **{event["timestamp"]}**
- func: **{event["json_state"]["reply_func_name"]}**
- final: **{event["json_state"]["final"]}**


"""
    if "content" in event["json_state"]["reply"]:
        content = event["json_state"]["reply"]["content"]
    elif isinstance(event["json_state"]["reply"], str):
        content = event["json_state"]["reply"]
    else:
        raise Exception("unknown content type")
    output += content_md(content, 0)

    return output


def received_msg_md(event, prefix=""):
    output = f"""###{prefix}{event["source_name"]} received message from {event["json_state"]["sender"]}
- timestamp: **{event["timestamp"]}**
- received by: **{event["source_name"]}** type: **{event["agent_class_name"]}**
- sent by: **{event["json_state"]["sender"]}** type: **{event["json_state"].get("sender_class", "not_available")}**


"""
    if "content" in event["json_state"]["message"]:
        content = event["json_state"]["message"]["content"]
    elif isinstance(event["json_state"]["message"], str):
        content = event["json_state"]["message"]
    else:
        raise Exception("unknown content type")
    output += content_md(content, 0)
    return output


def browser_action_md(event, prefix=""):
    return f"""###{prefix}browser action
- timestamp: **{event["timestamp"]}**
- action: **{event["json_state"]["action"]}**
- target: **{event["json_state"]["target"]}**
- args: **{event["json_state"]["arg"]}**
"""


def viewport_state_md(event, prefix=""):
    return f"""###{prefix}viewport state
- timestamp: **{event["timestamp"]}**
- percent visible: **{event["json_state"]["percent_visible"]}**
- percent scrolled: **{event["json_state"]["percent_scrolled"]}**
"""


def exception_thrown_lambda_md(event, prefix=""):
    # drop known exception - this one happens when the POST data is not a valid JSON object
    if "POST data is not a valid JSON object" in event["json_state"]["exc_message"]:
        return f"###{prefix}NoneEvent"
    else:
        return f"""###{prefix}exception thrown lambda
- {event["json_state"]["exc_message"]}'
"""


def outermost_event_md(event):
    if event is None:
        return "*no outermost event*"
    built_str = ""
    if event["event_name"] == "final_answer":
        built_str += f'- answer: **{event["json_state"]["final_answer"]}**\n'
    elif event["event_name"] == "final_score":
        built_str += f'- score: **{event["json_state"]["final_score"]}**\n'

    if event["event_name"] == "exception_thrown":
        single_str_traceback = "\n".join(event["json_state"]["exc_traceback"])
        built_str += f"""- exception type: **{event['json_state']['exc_type']}**
    - exception message: **{event['json_state']['exc_message']}**
    - exception traceback:
```
{single_str_traceback}
```
"""
    else:
        built_str += f"""- outermost event: **{event["event_name"]}**
    - timestamp: **{event["timestamp"]}**
    - inside: **{event["source_name"]}**
    """
    return built_str


def event_to_raw_markdown(event, event_num=None):
    if event is None:
        return "*no inner event with image*"

    prefix = ""
    if event_num is not None:
        prefix = f"{event_num}) "

    if event["event_name"] == "request_to_oai_cc":
        return oai_request_event_md(event, prefix)
    elif event["event_name"] == "reply_func_executed":
        return reply_func_event_md(event, prefix)
    elif event["event_name"] == "received_message":
        return received_msg_md(event, prefix)
    elif event["event_name"] == "browser_action":
        return browser_action_md(event, prefix)
    elif event["event_name"] == "viewport_state":
        return viewport_state_md(event, prefix)
    elif event["event_name"] == "exception_thrown_lambda":
        return exception_thrown_lambda_md(event, prefix)

    return f"""###{prefix}event_name: {event["event_name"]} [*view not implemented*]"""

In [None]:
def event_to_text(event):
    if event:
        if len(event.keys()) == 0:
            return "no event", str(0)
        if len(event["inner"]) == 1 and len(event["inner"][0]) == 0:
            imageless_events = "*next event has image or new phase*"
        else:
            imageless_events = [
                event_to_raw_markdown(imageless_event, i) for i, imageless_event in enumerate(event["inner"])
            ]
            imageless_events = "\n\n".join(imageless_events)
            imageless_events = (
                f"**{len(event['inner'])}** events without image after above event:\n\n{imageless_events}"
            )

        event_in_raw_markdown = event_to_raw_markdown(event["image_holder"])

        return markdown(event_in_raw_markdown), markdown(imageless_events)
    return "no event", str(0)

# ipywidgets land - actual UI

## important: update db_base_path and question_id below!!

In [None]:
from sqlite3 import OperationalError

loaded_db_name = r"C:\webarena__validation_reddit_FiveAgents\\"
loaded_question_id = 69
try:
    conn = load_db(db_base_path=loaded_db_name, question_id=loaded_question_id, run_id=0)
except OperationalError:
    conn = None


import base64
from IPython.display import display
import ipywidgets as widgets
from markdown import markdown

DEFAULT_WIDTH = "800px"

if conn:
    merged_and_nested = get_relevant_events(conn)
    data = merged_and_nested
else:
    data = [{"inner": [None], "scenariopy": None}]

# events are stored in lists of lists so we need to keep track of the current event being rendered
# since we have the concept of 'outer events' and 'inner events' we need a type of list of lists
# the outer events represent phases in the driver/orchestrator code and the inner events are
# events caused during that phase
index = [
    len(data) - 1,
    len(data[len(data) - 1]["inner"]) - 1,
]  # Using a list to keep track of the index since integers are immutable


def update_image_content(image_widget, event):
    if event and "image_holder" in event and event["image_holder"] is not None:
        encoded = event["image_holder"]["image"]
        image_widget.value = base64.b64decode(encoded)
        image_widget.width = 800
        image_widget.height = 1000
    else:
        image_widget.value = b""
        image_widget.width = 0
        image_widget.height = 0


def update_content(outer_event_index, inner_event_index):
    outer_event_textarea.value = markdown(
        outermost_event_md(data[outer_event_index]["scenariopy"]), extensions=["fenced_code"]
    )
    event_repr, inner_events_repr = event_to_text(data[outer_event_index]["inner"][inner_event_index])
    event_with_image_textarea.value = event_repr
    events_without_image_textarea.value = inner_events_repr

    # load image - the inner_event_index has an image always
    curr_event = data[outer_event_index]["inner"][inner_event_index]

    update_image_content(image_widget, curr_event)


## render UI

base_dir_text = widgets.Text(
    value=loaded_db_name,
    description="Base directory",
    disabled=False,
    layout=widgets.Layout(width="80%"),
)

experiment_num_text = widgets.Text(
    value=str(loaded_question_id),
    description="Experiment number",
    disabled=False,
)

status_msg = widgets.Text(disabled=True, layout=widgets.Layout(width=DEFAULT_WIDTH))
if conn is None:
    status_msg.value = f"does not exist: {loaded_db_name} exp_num:{loaded_question_id}"


def on_experiment_num_change(change):
    if change["type"] == "change" and change["name"] == "value":
        try:
            if change.new == "":
                return
            conn = load_db(db_base_path=base_dir_text.value, question_id=int(change.new), run_id=0)
            status_msg.value = f"loaded {base_dir_text.value} exp_num:{change.new} run:0 successfully"
            global loaded_question_id
            loaded_question_id = int(change.new)
            global loaded_db_name
            loaded_db_name = base_dir_text.value
            global data
            data = get_relevant_events(conn)
            index[0] = len(data) - 1
            index[1] = len(data[len(data) - 1]["inner"]) - 1
            update_content(outer_event_index=index[0], inner_event_index=index[1])
        except OperationalError:
            status_msg.value = f"does not exist: {base_dir_text.value} exp_num:{change.new} run:0 showing previous data {loaded_db_name} exp_num:{loaded_question_id} run:0"
            pass


experiment_num_text.observe(on_experiment_num_change, names="value", type="change")

input_box = widgets.HBox([base_dir_text, experiment_num_text], layout=widgets.Layout(width="800px"))
widgets.HBox([base_dir_text, experiment_num_text])

outer_event_textarea = widgets.HTML(
    value=markdown(outermost_event_md(data[index[0]]["scenariopy"]), extensions=["fenced_code"]),
    disabled=False,
    layout={
        "width": DEFAULT_WIDTH,
        "height": "110px",
        "overflow": "auto",
        "border": "1px solid black",
        "padding": "5px",
    },
)

current_event = data[index[0]]["inner"][index[1]]
image_event_repr, inner_events_repr = event_to_text(current_event)

event_with_image_textarea = widgets.HTML(
    value=image_event_repr,
    disabled=False,
    layout={
        "width": DEFAULT_WIDTH,
        "height": "300px",
        "overflow": "auto",
        "border": "1px solid black",
        "padding": "5px",
    },
)

events_without_image_textarea = widgets.HTML(
    value=inner_events_repr,
    disabled=False,
    layout={
        "width": DEFAULT_WIDTH,
        "height": "200px",
        "overflow": "auto",
        "border": "1px solid black",
        "padding": "5px",
    },
)

image_widget = widgets.Image(
    format="png",
    width=0,
    height=0,
)
update_image_content(image_widget, current_event)
prev_button = widgets.Button(description="Previous")
next_button = widgets.Button(description="Next")
prev_phase_button = widgets.Button(description="Previous Phase")
next_phase_button = widgets.Button(description="Next Phase")


def on_prev_button_clicked(b):
    if index[1] > 0:
        index[1] -= 1
        update_content(outer_event_index=index[0], inner_event_index=index[1])
    elif index[0] > 0:
        index[0] -= 1
        index[1] = len(data[index[0]]["inner"]) - 1
        update_content(outer_event_index=index[0], inner_event_index=index[1])


def on_next_button_clicked(b):
    if index[1] < len(data[index[0]]["inner"]) - 1:
        index[1] += 1
        update_content(outer_event_index=index[0], inner_event_index=index[1])
    elif index[0] < len(data) - 1:
        index[0] += 1
        index[1] = 0
        update_content(outer_event_index=index[0], inner_event_index=index[1])


def on_prev_phase_button_clicked(b):
    if index[0] > 0:
        index[0] -= 1
        index[1] = 0
        update_content(outer_event_index=index[0], inner_event_index=index[1])


def on_next_phase_button_clicked(b):
    if index[0] < len(data) - 1:
        index[0] += 1
        index[1] = 0
        update_content(outer_event_index=index[0], inner_event_index=index[1])


prev_button.on_click(on_prev_button_clicked)
next_button.on_click(on_next_button_clicked)
prev_phase_button.on_click(on_prev_phase_button_clicked)
next_phase_button.on_click(on_next_phase_button_clicked)

button_grid = widgets.GridBox(
    children=[prev_phase_button, prev_button, next_phase_button, next_button],
    layout=widgets.Layout(grid_template_rows="auto auto", grid_template_columns="49% 49%", grid_gap="5px 5px"),
)

container_layout = widgets.Layout(width="820px")
container = widgets.VBox(
    [
        input_box,
        status_msg,
        button_grid,
        outer_event_textarea,
        event_with_image_textarea,
        events_without_image_textarea,
        image_widget,
    ],
    layout=container_layout,
)

display(container)

## EXTRA: Diff between states - in this case cookie states

We could also figure out what 'json_state' has changed in between same event types. In this section we try to debug how and when the cookie state changes.

In [None]:
from jsondiff import diff

final = get_all_events(conn, filter=False)

cookies_events = [event for event in final if event["event_name"] == "cookies"]
# print when cookie changes
for i in range(1, len(cookies_events)):
    if curr_diff := diff(cookies_events[i - 1]["json_state"], cookies_events[i]["json_state"]):
        print("event number:", cookies_events[i]["id"])
        print(cookies_events[i]["timestamp"])
        print(curr_diff)
        print()

In [None]:
point_of_interest = 270
context = final[point_of_interest - 35 : point_of_interest]


def filter_events(event):
    if "json_state" in event:
        if any(ext in event["json_state"].get("url", "") for ext in ("js", "svg", "css", "woff2")):
            return False

    return True


# drop from context if inside json state url contains 'js' and 'svg'
context = [c for c in context if filter_events(c)]

for i in range(1, len(context)):
    print("*****", context[i]["id"], context[i]["source_name"], context[i]["event_name"])
    print(context[i])
    if context[i]["event_name"] == "Request":
        print(context[i]["json_state"]["request_content"])
        print(context[i]["json_state"])
    else:
        if "json_state" in context[i]:
            print(context[i]["json_state"])
        else:
            print(context[i])
    print()