<a href="https://colab.research.google.com/github/SDET-Hamad-KMughal/ARES-Framework/blob/main/Phase_3_Logic/code/ARES_Action_Mapping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# 1. Install YOLOv8
!pip install ultralytics

# 2. Imports
import os
from ultralytics import YOLO
from google.colab import drive
import torch

# 3. Mount Drive
drive.mount('/content/drive')

Collecting ultralytics
  Downloading ultralytics-8.4.6-py3-none-any.whl.metadata (38 kB)
Collecting ultralytics-thop>=2.0.18 (from ultralytics)
  Downloading ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.4.6-py3-none-any.whl (1.2 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.2/1.2 MB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.18-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.4.6 ultralytics-thop-2.0.18
Creating new Ultralytics Settings v0.0.6 file ‚úÖ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Mounted at /content/drive

In [2]:
# Load your verified weights from Phase 1
# Path: /content/drive/MyDrive/ARES_Final_Study/Perception_Engine_v22/weights/best.pt
model_path = '/content/drive/MyDrive/ARES_Final_Study/Perception_Engine_v22/weights/best.pt'
model_perception = YOLO(model_path)

print("‚úÖ Phase 1 Weights Loaded into Action Mapper.")

‚úÖ Phase 1 Weights Loaded into Action Mapper.


In [4]:
def get_ares_action_space(image_path):
    # Use YOLOv8 to 'see' the current state
    results = model_perception(image_path)

    actions = []
    for box in results[0].boxes:
        # Extract coordinates and label
        x1, y1, x2, y2 = box.xyxy[0]
        label = model_perception.names[int(box.cls[0])]

        # Calculate Center Point (The 'Click' target)
        center_x = (x1 + x2) / 2
        center_y = (y1 + y2) / 2

        actions.append({
            "element": label,
            "coords": (float(center_x), float(center_y)),
            "confidence": float(box.conf[0])
        })

    return actions

In [5]:
# Test it on the Airbnb image
img_path = '/content/drive/MyDrive/ARES_test_images/Airbnb iOS 2.png'
test_actions = get_ares_action_space(img_path)

print(f"üéØ Found {len(test_actions)} interactable elements.")
for i, action in enumerate(test_actions):
    print(f"Element {i+1}: {action['element']} at {action['coords']}")


image 1/1 /content/drive/MyDrive/ARES_test_images/Airbnb iOS 2.png: 640x288 1 ArrowButton, 1 Checkmark, 1 DropDown, 2 Icons, 1 Input, 5 TextButtons, 375.1ms
Speed: 15.1ms preprocess, 375.1ms inference, 50.5ms postprocess per image at shape (1, 3, 640, 288)
üéØ Found 11 interactable elements.
Element 1: TextButton at (560.3558349609375, 1068.017822265625)
Element 2: Checkmark at (147.0794677734375, 1495.900634765625)
Element 3: TextButton at (566.4544677734375, 1879.9241943359375)
Element 4: TextButton at (566.7907104492188, 2072.89306640625)
Element 5: TextButton at (568.6433715820312, 1687.60791015625)
Element 6: Icon at (145.38052368164062, 1687.755126953125)
Element 7: Icon at (870.7791748046875, 2493.90673828125)
Element 8: DropDown at (561.3467407226562, 501.5756530761719)
Element 9: TextButton at (569.48046875, 1501.490478515625)
Element 10: Input at (563.8064575195312, 686.64111328125)
Element 11: ArrowButton at (97.18080139160156, 262.9546813964844)
