#  ImageNet (1000) labels to COCO (80) label + minor changes for our case for LPCV

In [None]:
import re
def replace_non_alphanumeric_with_underscore(input_string):
    return re.sub(r'[^a-zA-Z0-9]+', '_', input_string)
label_to_imagenet_id = {} 
with open("imagenet2coco.txt", "r") as file: 
    for line in file: 
        
        imagenet_id, imagenet_label, coco_label = line.strip().split("\t") 
        k=class_names = imagenet_label.lower().split(",") # Pravimo listu mogućih naziva klase
        #print(class_names)
        class_names = [replace_non_alphanumeric_with_underscore(s.strip()) for s in class_names]
        #rint(k,class_names)
        for name in class_names: 
            label_to_imagenet_id[name] = imagenet_id # Povezujemo naziv klase sa ImageNet ID-jem 
print(f" Učitano {len(label_to_imagenet_id)} klasifikacija iz 'imagenet2coco.txt'.")  
print(label_to_imagenet_id)

 Učitano 1835 klasifikacija iz 'imagenet2coco.txt'.
{'tench': 'n01440764', 'tinca_tinca': 'n01440764', 'goldfish': 'n01443537', 'carassius_auratus': 'n01443537', 'great_white_shark': 'n01484850', 'white_shark': 'n01484850', 'man_eater': 'n01484850', 'man_eating_shark': 'n01484850', 'carcharodon_carcharias': 'n01484850', 'tiger_shark': 'n01491361', 'galeocerdo_cuvieri': 'n01491361', 'hammerhead': 'n01494475', 'hammerhead_shark': 'n01494475', 'electric_ray': 'n01496331', 'crampfish': 'n01496331', 'numbfish': 'n01496331', 'torpedo': 'n01496331', 'stingray': 'n01498041', 'cock': 'n01514668', 'hen': 'n01514859', 'ostrich': 'n01518878', 'struthio_camelus': 'n01518878', 'brambling': 'n01530575', 'fringilla_montifringilla': 'n01530575', 'goldfinch': 'n01531178', 'carduelis_carduelis': 'n01531178', 'house_finch': 'n01532829', 'linnet': 'n01532829', 'carpodacus_mexicanus': 'n01532829', 'junco': 'n01534433', 'snowbird': 'n01534433', 'indigo_bunting': 'n01537544', 'indigo_finch': 'n01537544', 'ind

In [None]:
import os
import json

#  Podesi putanju do foldera gde su slike
imagenet_folder = "/home/centar15-desktop1/LPCV_2025_T1/datasets/imagenet/archive"

#  Kreiraj listu anotacija (pretpostavljamo da su nazivi fajlova u formatu "nXXXXX_YYY.jpg")
annotations = []

for class_folder in os.listdir(imagenet_folder):
    class_path = os.path.join(imagenet_folder, class_folder)

    class_folder_clean = class_folder.lower()
    imagenet_id = label_to_imagenet_id.get(class_folder_clean)
    

    if(os.path.isdir(class_path)):
        #print(class_folder_clean, imagenet_id)
        for filename in os.listdir(class_path):

            if filename.endswith(".jpg"):
                image_path = os.path.join(class_folder, filename)
                annotations.append({"image_location": image_path, "image_net_id": imagenet_id})

#  Sačuvaj anotacije u JSON fajl
with open("imagenet_annotations.json", "w") as f:
    json.dump(annotations, f, indent=4)

print(f" Generisano {len(annotations)} ImageNet anotacija u 'imagenet_annotations.json'")

In [None]:
import json
# 1️ Učitavanje mape ImageNet → COCO
def load_imagenet2coco_map(map_file="imagenet2coco.txt"):
    imagenet2coco = {}
    with open(map_file, "r") as file:
        for line in file:
            imagenet_id, imagenet_label, coco_label = line.strip().split("\t")
            imagenet2coco[imagenet_id] = coco_label if coco_label != "None" else None
    return imagenet2coco

# 2️ Učitavanje ImageNet anotacija
def load_imagenet_annotations(input_json="imagenet_annotations.json"):
    with open(input_json, "r") as file:
        return json.load(file)

# 3️ Mapiranje ImageNet → COCO
def convert_imagenet_to_coco(imagenet_annotations, imagenet2coco):
    coco_annotations = []
    for ann in imagenet_annotations:
        coco_label = imagenet2coco.get(ann["image_net_id"])
        if coco_label:  # Samo ako postoji COCO ekvivalent
            coco_annotations.append({"image_location": ann["image_location"], "coco_label": coco_label})
    return coco_annotations

# 4️ Konverzija u COCO JSON format

def save_coco_json(coco_annotations, output_json="coco_annotations.json"):
    # Create the images list with id, file_name, and coco_label
    images = [
        {"id": i, "file_name": ann["image_location"], "coco_label": ann["coco_label"]}
        for i, ann in enumerate(coco_annotations)
    ]

    coco_format = {
        "images": images,
    }

    # Save the JSON to the specified output file
    with open(output_json, "w") as f:
        json.dump(coco_format, f, indent=4)


imagenet2coco = load_imagenet2coco_map("imagenet2coco.txt")
imagenet_annotations = load_imagenet_annotations("imagenet_annotations.json")
coco_annotations = convert_imagenet_to_coco(imagenet_annotations, imagenet2coco)
save_coco_json(coco_annotations, "coco_annotations.json")
print(f" Konverzija završena! Rezultat je sačuvan u 'coco_annotations.json'")

✅ Konverzija završena! Rezultat je sačuvan u 'coco_annotations.json'


In [None]:
import os
import json
import shutil

#  Path to your JSON file
json_file_path = "coco_annotations.json"  # Replace with the correct path to your JSON file

#  Path to your original ImageNet dataset
imagenet_root = "/home/centar15-desktop1/LPCV_2025_T1/datasets/imagenet/archive"  # Replace with the root folder of your ImageNet dataset

#  Path to the new dataset folder
new_dataset_root = "/home/centar15-desktop1/LPCV_2025_T1/datasets/imagenet/coco_80"  # New dataset folder

#  Create new dataset folder
os.makedirs(new_dataset_root, exist_ok=True)

#  Load the JSON file
with open(json_file_path, "r") as f:
    annotations = json.load(f)

#asdf = set()

i = 0

#  Reorganize images into 80 COCO classes
for annotation in annotations["images"]:
    file_name = annotation["file_name"]  # e.g., "schipperke/178.jpg"
    coco_label = annotation["coco_label"]  # e.g., "dog"

    # Skip if the COCO label is None
    if coco_label is None:
        continue
    #asdf.add(coco_label)

    # Source image path
    source_path = os.path.join(imagenet_root, file_name)

    # Destination folder for the COCO class
    class_folder = os.path.join(new_dataset_root, coco_label)
    os.makedirs(class_folder, exist_ok=True)

    # Destination image path
    dest_path = os.path.join(class_folder, os.path.basename(f"{i:010}"))

    # Copy the image
    if os.path.exists(source_path):
        shutil.copy2(source_path, dest_path)
    else:
        print(f" File not found: {source_path}")

    i = i+1

print(f" New dataset created at '{new_dataset_root}' with 80 COCO classes!")
#print(len(asdf))