In [1]:
import json
import os

In [2]:
# src_path = "/home/jiayipan/data/ios_traj/output_cogagent"
# src_path = "/home/jiayipan/data/GUI_Proj/output_autoui_large"
src_path = "/home/jiayipan/data/GUI_Proj/unified_datasets/android-gt"
full_data = []
raw_data = json.load(open(os.path.join(src_path, "aitw_all.json"), "r"))

In [3]:
"""
Copied from android_eval
"""
from enum import Enum
from dataclasses import dataclass
from typing import Tuple
class ActionType(Enum):
    Idle=0
    DualPoint=1
    Type=2
    GoBack=3
    GoHome=4
    Enter=5
    TaskComplete=6
    TaskImpossible=7

@dataclass
class AndroidAction():
    action_type: ActionType
    touch_point: Tuple[float, float] = None
    lift_point: Tuple[float, float] = None
    typed_text: str = None

    def __str__(self):
        # Construct the basic action type string.
        components = [f"Action Type: {self.action_type.name}"]

        # Format and add touch_point if it's not None.
        if self.touch_point:
            touch_point_str = f"({self.touch_point[0]:.4f}, {self.touch_point[1]:.4f})"
            components.append(f"Touch Point: {touch_point_str}")

        # Format and add lift_point if it's not None.
        if self.lift_point:
            lift_point_str = f"({self.lift_point[0]:.4f}, {self.lift_point[1]:.4f})"
            components.append(f"Lift Point: {lift_point_str}")

        # Add typed_text if it's not None.
        if self.typed_text:
            components.append(f"Typed Text: '{self.typed_text}'")

        # Join all components into a single string.
        return ", ".join(components)

    def to_act(self):
        pass

### Prepare Eval Results

In [6]:
acts = raw_data[0]['steps'][-1]['other']['aitw_action']

In [9]:
def is_traj_success(traj):
    last_act = traj['steps'][-1]['other']['aitw_action']
    return last_act[0] == "STATUS_TASK_COMPLETE"

True

In [12]:
formated_eval_results = []
for traj in raw_data:
    formated_eval_results.append({
        "dataset_path": src_path.split("/")[-1],
        "task_idx": traj['uid'],
        "task_uid": traj['uid'],
        "user_uid": "AitW",
        "annotation": is_traj_success(traj),
        "comment": "",
    })

with open(os.path.join(src_path, "evals", "gt.jsonl"), 'w') as file:
    for item in formated_eval_results:
        # Convert each dictionary to a JSON string and write it to a file
        json_string = json.dumps(item)
        file.write(json_string + '\n')

### Fix Trajectory Log

In [24]:
def get_translated_action(raw_action):
    act_type, act_arg = raw_action
    if act_type == "DUAL_POINT":
        return AndroidAction(ActionType.DualPoint, (act_arg[1], act_arg[0]), (act_arg[3], act_arg[2]))
    elif act_type == "TYPE":
        return AndroidAction(ActionType.Type, typed_text=act_arg)
    elif act_type == "PRESS_BACK":
        return AndroidAction(ActionType.GoBack)
    elif act_type == "PRESS_HOME":
        return AndroidAction(ActionType.GoHome)
    elif act_type == "PRESS_ENTER":
        return AndroidAction(ActionType.Enter)
    elif act_type == "STATUS_TASK_COMPLETE":
        return AndroidAction(ActionType.TaskComplete)
    elif act_type == "STATUS_TASK_IMPOSSIBLE":
        return AndroidAction(ActionType.TaskImpossible)
    else:
        raise ValueError(f"Unknown action type: {act_type}")

In [29]:
for traj_idx, traj in enumerate(raw_data):
    for step_idx, step in enumerate(traj['steps']):
        raw_action = step['other']['aitw_action']
        translated_action = get_translated_action(raw_action)
        raw_data[traj_idx]['steps'][step_idx]['other']['action'] = str(translated_action)

In [31]:
with open(os.path.join(src_path, "trajectory_log.json"), 'w') as file:
    json.dump(raw_data, file, indent=2)