In [20]:
import glob
import json
import os
import zipfile

from pathlib import Path
import shutil
from operator import itemgetter

import cv2
import pandas as pd
import torch
import yaml
from sklearn.model_selection import train_test_split
from ultralytics import YOLO
from openai import OpenAI

In [14]:
from dotenv import load_dotenv
load_dotenv()

True

# Data preparations

In [8]:
images = Path('../data/images/')
json_trees = Path('../data/json_trees/')
clean_trees = Path('../data/clean_trees/')
representations = Path('../data/representations/')


os.makedirs(images, exist_ok=True)
os.makedirs(json_trees, exist_ok=True)
os.makedirs(clean_trees, exist_ok=True)
os.makedirs(representations, exist_ok=True)

Data hierarchy:

data
├───clean_trees
├───images
├───json_trees
├───representations
└───yolo_data
    ├───test
    │   ├───images
    │   └───labels
    ├───train
    │   ├───images
    │   └───labels
    └───val
        ├───images
        └───labels

In [27]:
iasa_data_dir = Path('../data/IASA_Champ_Final/app_data')

In [28]:
df = pd.read_csv(iasa_data_dir.parent / 'train_test_split.csv')
df.head()

Unnamed: 0,screen_path,split
0,24hourwallpaper/1707228345/,train
1,24hourwallpaper/1707228419/,train
2,24hourwallpaper/1707228389/,train
3,Almighty/1707228267/,train
4,Almighty/1707228277/,train


In [13]:
replacements = {'Elelphas': 'Elephas', 'Marginnote': 'MarginNote', ' Express': '', 'studio': 'Studio'}
df['screen_path'] = df['screen_path'].replace(replacements, regex=True)
df.head()

Unnamed: 0,screen_path,split
0,24hourwallpaper/1707228345/,train
1,24hourwallpaper/1707228419/,train
2,24hourwallpaper/1707228389/,train
3,Almighty/1707228267/,train
4,Almighty/1707228277/,train


## Restore images and json trees

In [18]:
for index, row in df.iterrows():
    current_image_path = os.path.join(iasa_data_dir, row['screen_path'])
    
    json_tree = glob.glob(os.path.join(current_image_path, '*.json'))
    img_name = glob.glob(os.path.join(current_image_path, '*.png'))[0]

    shutil.copy(
        img_name,
        Path('../data/images') / (row['screen_path'].replace('/', '_')[:-1] + '.png')
    )
    
    if json_tree:
        json_tree = json_tree[0]
        shutil.copy(
            json_tree,
            Path('../data/json_trees') / (row['screen_path'].replace('/', '_')[:-1] + '.json')
        )     

## Prepare data to train YOLO

In [51]:
yolo_path = Path('../data/yolo_data')
os.makedirs(yolo_path / 'train' / 'images', exist_ok=True)
os.makedirs(yolo_path / 'train' / 'labels', exist_ok=True)

os.makedirs(yolo_path / 'val' / 'images', exist_ok=True)
os.makedirs(yolo_path / 'val' / 'labels', exist_ok=True)

os.makedirs(yolo_path / 'test' / 'images', exist_ok=True)
os.makedirs(yolo_path / 'test' / 'labels', exist_ok=True)

In [22]:
def get_yolo_format(json_tree, w, h):
    elements = []
    stack = [json_tree]  # Initialize a stack with the root node

    while stack:
        current_node = stack.pop()

        for elem in current_node:
            bbox = elem['visible_bbox']
            if bbox is not None:
                elements.append([
                    elem['role'],
                    (bbox[0] + bbox[2]/2) / w,
                    (bbox[1] + bbox[3]/2) / h,
                    bbox[2] / w,
                    bbox[3] / h,
                ])

                # Add children to the stack for processing
                if 'children' in elem:
                    stack.append(elem['children'])

    return elements

In [29]:
all_elements = []
img_to_elements = {}

json_trees = glob.glob('../data/json_trees/*.json')

for json_tree in json_trees:
    img_name = json_tree.replace('.json', '.png').replace('json_trees', 'images')
    with open(json_tree) as f:
        json_tree = json.load(f)
    

    img = cv2.imread(img_name)
    height, width = img.shape[:2]

    elements = get_yolo_format([json_tree], width, height)
    all_elements.extend(map(itemgetter(0), elements))
    img_to_elements[os.path.basename(img_name)] = elements

In [30]:
elements_count = pd.Series(all_elements).value_counts()
elements_count

AXStaticText             5246
AXGroup                  4872
AXButton                 4018
AXImage                  1859
AXRow                    1190
AXCell                   1106
AXCheckBox                500
AXScrollArea              345
AXTextField               324
AXRadioButton             271
AXLink                    239
AXWindow                  220
AXPopUpButton             218
AXHeading                 195
AXToolbar                 187
AXList                    184
AXValueIndicator          181
AXColumn                  174
AXGenericElement          170
AXScrollBar               147
AXMenuButton              144
AXSplitGroup              112
AXSlider                  103
AXSplitter                 96
AXTable                    82
AXMenuItem                 80
AXOutline                  77
AXUnknown                  64
AXRadioGroup               64
AXWebArea                  49
AXTextArea                 43
AXTabGroup                 37
AXDisclosureTriangle       32
AXIncremen

In [31]:
thd = 100
class_to_index = {cls: idx for idx, cls in enumerate(elements_count[elements_count > thd].index)}

In [40]:
all_images = list(img_to_elements.keys())
train_imgs, val_imgs = train_test_split(all_images, test_size=0.1, random_state=42)

In [45]:
for img in list(train_imgs):
    elements = img_to_elements.get(img)

    elements = list(map(lambda x: [class_to_index[x[0]]] + x[1:] if x[0] in class_to_index else None, elements))
    elements = list(filter(lambda x: x is not None, elements))

    labels = '\n'.join(' '.join(str(el) for el in elem) for elem in elements)

    shutil.copy(
        Path('../data/images') / img,
        yolo_path/ 'train'/ 'images'/ (img + '.png')
    )
    with open(yolo_path/ 'train'/ 'labels'/ img.replace('.png', '.txt'), 'w') as f:
        f.write(labels)

In [46]:
for img in list(val_imgs):
    elements = img_to_elements.get(img)

    elements = list(map(lambda x: [class_to_index[x[0]]] + x[1:] if x[0] in class_to_index else None, elements))
    elements = list(filter(lambda x: x is not None, elements))

    labels = '\n'.join(' '.join(str(el) for el in elem) for elem in elements)

    shutil.copy(
        Path('../data/images') / img,
        yolo_path/ 'val'/ 'images'/ (img + '.png')
    )
    with open(yolo_path/ 'val'/ 'labels'/ img.replace('.png', '.txt'), 'w') as f:
        f.write(labels)

## Restore test data for yolo

In [49]:
all_images = glob.glob('../data/images/*.png')
all_images = {os.path.basename(img) for img in all_images}

test_images = all_images - set(img_to_elements.keys())

In [76]:
for img in test_images:
    shutil.copy(
        Path('../data/images') / img,
        yolo_path/ 'test'/ 'images'/ img
    )

## Make config file to train yolo

In [63]:
config = {
    'path': str(yolo_path.absolute()),
    'train': 'train\images',
    'val': 'val\images',
    'test': 'test\images',
    'nc': 23,
    'names': list(class_to_index.keys())
}

In [64]:
with open(yolo_path / 'config.yaml', 'w') as f:
    yaml.dump(config, f)

# YOLO
## Training YOLOv8s

In [65]:
model = YOLO('yolov8s.yaml')

In [None]:
model.train(data=yolo_path/'config.yaml', epochs=1200)

## Inference YOLO

In [81]:
model = YOLO(r"runs/detect/train/weights/best.pt")
input_folder = yolo_path / 'test' / 'images'
res = model.predict(input_folder)

image 1/114 D:\Programming\Programs\Python\DS\IASA DS Champ 2024\Main task\notebooks\..\data\yolo_data\test\images\24hourwallpaper_1707228345.png: 640x544 15 AXStaticTexts, 12 AXButtons, 1 AXImage, 1 AXScrollArea, 1 AXWindow, 2 AXPopUpButtons, 1 AXList, 1 AXSplitGroup, 7.0ms
image 2/114 D:\Programming\Programs\Python\DS\IASA DS Champ 2024\Main task\notebooks\..\data\yolo_data\test\images\24hourwallpaper_1707228389.png: 384x640 5 AXStaticTexts, 8 AXGroups, 2 AXButtons, 1 AXImage, 1 AXLink, 1 AXWindow, 6.0ms
image 3/114 D:\Programming\Programs\Python\DS\IASA DS Champ 2024\Main task\notebooks\..\data\yolo_data\test\images\24hourwallpaper_1707228419.png: 384x640 2 AXStaticTexts, 4 AXGroups, 2 AXButtons, 1 AXWindow, 1 AXPopUpButton, 6.0ms
image 4/114 D:\Programming\Programs\Python\DS\IASA DS Champ 2024\Main task\notebooks\..\data\yolo_data\test\images\Bike_1707240151.png: 640x640 11 AXStaticTexts, 5 AXButtons, 2 AXScrollAreas, 1 AXWindow, 7.0ms
image 5/114 D:\Programming\Programs\Python\DS\

In [82]:
for result in res:
    output_file = result.path.replace('images', 'labels').replace('.png', '.txt')

    with open(output_file, 'w') as f:
        for box, conf, cls in zip(result.boxes.xywh, result.boxes.conf, result.boxes.cls):
            x1, y1, x2, y2 = box.type(torch.int).tolist()
            f.write(f'{result.names[cls.item()]} {conf:.4f} {x1} {y1} {x2} {y2}\n')

# Building hierarchy

In [97]:
def bb_IOU(bb1, bb2):
    x1, y1, w1, h1 = bb1
    x2, y2, w2, h2 = bb2
    w = min(x1 + w1, x2 + w2) - max(x1, x2)
    h = min(y1 + h1, y2 + h2) - max(y1, y2)
    if w <= 0 or h <= 0:
        return 0
    I = w * h
    U = w1 * h1 + w2 * h2 - I
    return I / U




def create_nodes(fields):
    name = fields[0]
    role = fields[0]
    bbox = [int(float(fields[i])) for i in range(1, 5)]
    visible_bbox = bbox.copy()
    description = None
    role_description = None
    value = None
    children = []
    element = {
        "name": name,
        "role": role,
        "description": description,
        "role_description": role_description,
        "value": value,
        "children": children,
        "bbox": bbox,
        "visible_bbox": visible_bbox
    }
    return element


def create_json_file(txt_file_path, output_dir):
    txt_file_path = Path(txt_file_path)
    output_dir = Path(output_dir)
    output_filename = txt_file_path.stem + ".json"
    output_file_path = output_dir / output_filename

    if not output_dir.exists():
        output_dir.mkdir(parents=True)

    with txt_file_path.open('r') as txt_file:
        node = {}
        data = []
        for line in txt_file:
            fields = line.strip().split()
            element = create_nodes(fields)
            if fields[0] == 'AXWindow':
                node = element
            else:
                data.append(element)

        if not node:
            node = create_nodes(['AXWindow', '0', '0', '0', '0'])
        node['children'].extend(data)

        for i, element in enumerate(node['children'].copy()):
            for j, other_element in enumerate(node['children'].copy()):
                if i != j:
                    iou = bb_IOU(element['bbox'], other_element['bbox'])
                    if abs(iou) > 0.5:
                        element['children'].append(other_element)
                        node['children'].remove(other_element)

        with output_file_path.open('w') as json_file:
            json.dump(node, json_file, indent=4)


for txt_file_path in (yolo_path / 'test' / 'labels').glob("*.txt"):
    create_json_file(txt_file_path, json_trees)

# Clean trees

In [96]:
def fixing_elements(node):
    for i, child in enumerate(node['children']):
        if (child['visible_bbox'] == None) or \
                (all(x >= 0 for x in child['bbox']) == False):
            node['children'].pop(i)
        else:
            node['children'][i] = fixing_elements(child)
    if 'bbox' in node:
        del node['bbox']
    return node


for json_path in glob.glob(str(json_trees / '*.json')):
    with open(json_path, 'r') as file:
        data = json.load(file)
        
    fix_data = fixing_elements(data)
    new_path = json_path.replace('json_trees', 'clean_trees')
    with open(new_path, 'w') as file_write:
        json.dump(fix_data, file_write)

# Build our image representation
## The path to the directory with the JSON files to process and the base path to store the output text files

In [9]:
json_files_path = clean_trees / '*.json'
txt_output_path = representations

## A function for building a tree from a list of paths and a function for printing a tree

In [4]:
def write_hierarchy_as_tree(structure_info, output_path):
    def build_tree(paths):
        tree = {}
        for path in paths:
            parts = path.split('/')
            current_level = tree
            for part in parts:
                current_level = current_level.setdefault(part, {})
        return tree

    def print_tree(current_level, indent="", file=None):
        for i, (key, value) in enumerate(current_level.items()):
            end_char = "└──" if i == len(current_level)-1 else "├──"
            print(f"{indent}{end_char} {key}/", file=file)
            if value:  
                new_indent = indent + ("    " if i == len(current_level)-1 else "│   ")
                print_tree(value, new_indent, file=file)

    tree = build_tree(structure_info)
    with open(output_path, 'w', encoding='utf-8') as file:
        for root, sub_trees in tree.items():
            print(f"{root}/", file=file)
            print_tree(sub_trees, file=file)

## Processing json files and recording information from them

In [11]:
def process_json_file(file_path, output_base_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)

    roles_count = {}
    detailed_info = []
    role_instances = {}
    structure_info = []

    def parse_node(node, path=""):
        role = node.get('role')
        if role:
            roles_count[role] = roles_count.get(role, 0) + 1
            role_instances[role] = role_instances.get(role, 0) + 1

            if role_instances[role] > 1:
                unique_role = f"{role}{role_instances[role] - 1}"
            else:
                unique_role = role

            role_description = node.get('role_description', 'None')
            value = node.get('value', 'None')
            visible_bbox = node.get('visible_bbox', 'None')
            bbox = node.get('bbox', 'None')
            detailed_info.append(f'"role":"{unique_role}", "role_description": "{role_description}", "value": "{value}", "visible_bbox": "{visible_bbox}"' + (f', "bbox": "{bbox}"' if bbox != 'None' else ''))

            new_path = os.path.join(path, unique_role)
            structure_info.append(new_path)

            for child in node.get('children', []):
                parse_node(child, new_path)

    parse_node(data)

    # Write roles and their counts + detailed information into a single file
    detailed_output_path = os.path.join(output_base_path, os.path.basename(file_path).replace('.json', '_details.txt'))
    with open(detailed_output_path, 'w', encoding='utf-8') as file:
        file.write("Roles and their counts:\n")
        for role, count in roles_count.items():
            file.write(f'"{role}": "{count}"\n')
        file.write("\nDetailed information:\n")
        for info in detailed_info:
            file.write(f'{info}\n')

    # Optionally, create and write the directory structure info into another file
    structure_output_path = os.path.join(output_base_path, os.path.basename(file_path).replace('.json', '_structure.txt'))
    with open(structure_output_path, 'w', encoding='utf-8') as file:
        for path in structure_info:
            modified_path = path.replace("/", " -> ")
            file.write(f"{modified_path}\n")

## Processing the first 5 JSON files (change the number if necessary)

In [12]:
for json_file in df.query("split == 'test'")['screen_path']:
    json_file = json_file.replace('/', '_')[:-1] + '.json'
    process_json_file(json_files_path / json_file, txt_output_path)

## LLM Answering

In [15]:
with open('../data/prompts_final.json', 'r') as f:
    PROMPTS = json.load(f)

In [ ]:
openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

def get_model_response(question_type, question, elements_info, hierarchy_file):
    prompt = PROMPTS[question_type]
    quest = f"Screen representation consists of two text files. The first let you be aware of a number of different elements, their role, role_description, value and location (visible bbox). The second represents a hierarchy of this elements. Each element has its unique role. Elements info: {elements_info}, hierarchy file: {hierarchy_file}\n\nQuestion: {question}\n\nEnsure your response reflects a deep understanding of the described UI screen."

    model_response = openai_client.chat.completions.create(
        model="gpt-3.5-turbo-0125",
        temperature=0,
        messages=[
            {
                "role": "system",
                "content": prompt
            },
            {
                "role": "user",
                "content": quest
            }],
    )

    return model_response.choices[0].message.content

In [44]:
def unzip_file(zip_filepath, dest_dir):
    with zipfile.ZipFile(zip_filepath, 'r') as zip_ref:
        zip_ref.extractall(dest_dir)

unzip_file('../data/representations/representations.zip', '../data/')

In [24]:
# Завантажуємо розмітку дерева доступності
with open('../data/representations/Almighty_1707228209_details.txt','r', encoding='utf-8') as f:   
    elements = f.readlines()
    
with open('../data/representations/Almighty_1707228209_structure.txt','r', encoding='utf-8') as f:
    structure = f.readlines()

In [ ]:
question_type = ''  # Вказуємо тип відповіді
question = ''  # Надаємо запитання
elements_info = str(elements)
hierarchy_file = str(structure)

# Отримуємо відповідь
answer = get_model_response(
    openai_client,
    question_type,
    question,
    elements_info,
    hierarchy_file
)
answer