# Prepare data for fine-tuning
Prepare data for fine-tuning the Open Grounding DINO model.

### 0. Import libraries and load data

In [1]:
import os
import sys
import json
import shutil

RAW_DATA_PATH = "../../data/raw/"
ANNOTATIONS_PATH = "../../data/annotations/"
STORAGE_PATH = "../../data/fine-tuning/baseline/"
sys.path.append("../annotate_dataset/")

from annotate_paintings_utils import *

In [2]:
filenames = ["annotations_10000_10009", "annotations_30_39", "annotations_100_109", "annotations_1000_1009", "annotations_2400_2409", "annotations_2500_2509", "annotations_7000_7009"]
all_annotations = []

for filename in filenames:
    with open(f"{ANNOTATIONS_PATH}{filename}.json") as f:
        all_annotations.extend(json.load(f)["annotations"])

### 1. Create data for fine-tuning in jsonl format

In [3]:
if not os.path.exists(STORAGE_PATH):
    os.mkdir(STORAGE_PATH)
    os.mkdir(STORAGE_PATH + "train/")
    os.mkdir(STORAGE_PATH + "val/")

In [4]:
training_annotations = []

for painting_annotations in all_annotations:
    painting_id = painting_annotations["painting_id"]
    extracted_objects = list(painting_annotations["objects"].keys())
    bboxes = painting_annotations["bounding_boxes"]
    image = load_image(painting_id)[1]

    current_annotation = {"filename": f"{painting_id}.png", "height": image.size[1], "width": image.size[0]}
    regions = []
    caption = ""

    for bbox in bboxes:
        if bbox[0] not in extracted_objects:
            continue

        regions.append({"bbox": bbox[2], "phrase": bbox[0]})
        caption += f"{bbox[0]} . "

    current_annotation["grounding"] = {"caption": caption.strip(), "regions": regions} 

    if len(caption) != 0:
        training_annotations.append(current_annotation)
        source_path = f"{RAW_DATA_PATH}filtered_paintings/{painting_id}.png"
        destination_path = f"{STORAGE_PATH}train/{painting_id}.png"
        shutil.copy2(source_path, destination_path)

In [5]:
with open(f"{STORAGE_PATH}train/train_annotations.jsonl", "w", encoding='utf-8') as f:
    for item in training_annotations:
        json_line = json.dumps(item, ensure_ascii=False)
        f.write(json_line + '\n')