In [None]:
import json
import os
from datasets import Dataset, DatasetDict, Features, Sequence, Value, Image as HFImage

# Putevi do anotacija i slika
train_annotations_path = r"C:\Users\lovro\Desktop\stvarne_slike_train\train\_annotations.coco.json"
train_images_folder = r"C:\Users\lovro\Desktop\stvarne_slike_train\train"

val_annotations_path = r"C:\Users\lovro\Desktop\stvarne_slike_train\valid\_annotations.coco.json"
val_images_folder = r"C:\Users\lovro\Desktop\stvarne_slike_train\valid"

# Hugging Face token
HF_TOKEN = "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"

def formata(annotations_path, images_folder):
    # Učitavanje COCO anotacija
    with open(annotations_path, 'r') as f:
        coco_data = json.load(f)

    # Namještanje kategorija da budu od 0 do 9
    for ann in coco_data["annotations"]:
        ann["category_id"] = ann["category_id"] - 2 
        if ann["category_id"] == -1:
            ann["category_id"] = 9

    # Inicijalizacija strukture skupa podataka
    data_split = {"image_id": [], "image": [], "width": [], "height": [], "objects": []}

    # Pohranjivanje i organiziranje svih podataka
    for img in coco_data["images"]:
        image_id = img["id"]
        image_filename = img["file_name"]
        image_path = os.path.join(images_folder, image_filename)
        width = img["width"]
        height = img["height"]

        
        objects = []
        for ann in coco_data["annotations"]:
            objects.append({
                "id": ann["id"],
                "area": ann["area"],
                "bbox": ann["bbox"],
                "category": ann["category_id"],
            })
                
        data_split["image_id"].append(image_id)
        data_split["image"].append(image_path) 
        data_split["width"].append(width)
        data_split["height"].append(height)
        data_split["objects"].append(objects)

    return data_split

train_data = formata(train_annotations_path, train_images_folder)
val_data = formata(val_annotations_path, val_images_folder)

# Definiranje strukture Hugging Face skupa podataka
features = Features({
    "image_id": Value(dtype='int64', id=None),
    "image": HFImage(), 
    "width": Value(dtype='int64', id=None),
    "height": Value(dtype='int64', id=None),
    "objects": Sequence(
        feature={
            "id": Value(dtype='int64', id=None),
            "area": Value(dtype='int64', id=None),
            "bbox": Sequence(Value(dtype='float32', id=None)),
            "category": Value(dtype='int64', id=None),
        }
    ),
})
# Oblikovanje u Hugging Face skup podataka
hf_dataset = DatasetDict({
    "train": Dataset.from_dict(train_data, features=features),
    "validation": Dataset.from_dict(val_data, features=features),
})
# Dijeljenje na Hugging Face repozitorij
hf_dataset.push_to_hub("LovrOP/stvarni_dataset_zavrsni", token=HF_TOKEN)

print("Dataset uploadan!")
