In [1]:
import os
import shutil
import json
import random
import torch
from PIL import Image
from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection

In [2]:
os.environ["TRANSFORMERS_NO_TF"] = "1"
model_id  = "IDEA-Research/grounding-dino-base"
device    = "mps"
processor = AutoProcessor.from_pretrained(model_id)
model     = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(device)


In [3]:
# Paths & Splits
dataset_root    = "/Users/ramana/k/dataset"
images_in_root  = "/Users/ramana/k/Foodies_Challenge/images"
split_names     = ["train", "val", "test"]

# Ensure output directories
for split in split_names:
    os.makedirs(os.path.join(dataset_root, "images", split), exist_ok=True)
    os.makedirs(os.path.join(dataset_root, "labels", split), exist_ok=True)
os.makedirs(os.path.join(dataset_root, "annotations"), exist_ok=True)

# --- Discover classes and build mappings ---
dirs     = os.listdir(images_in_root)
classes  = sorted(d for d in dirs if os.path.isdir(os.path.join(images_in_root, d)))
categories   = [{"id": i+1, "name": cls} for i, cls in enumerate(classes)]
class_to_id  = {cls: i+1 for i, cls in enumerate(classes)}

In [4]:
# --- Gather per-class splits ---
splits = {split: [] for split in split_names}
for cls in classes:
    cls_folder = os.path.join(images_in_root, cls)
    images = [fname for fname in os.listdir(cls_folder) if fname.lower().endswith((".jpg", ".jpeg", ".png"))]
    images.sort()
    random.seed(42)
    random.shuffle(images)
    n = len(images)
    n_train = int(0.7 * n)
    n_val = int(0.15 * n)
    n_test = n - n_train - n_val
    splits["train"].extend([{"class": cls, "filename": fname, "path": os.path.join(cls_folder, fname)} for fname in images[:n_train]])
    splits["val"].extend([{"class": cls, "filename": fname, "path": os.path.join(cls_folder, fname)} for fname in images[n_train:n_train+n_val]])
    splits["test"].extend([{"class": cls, "filename": fname, "path": os.path.join(cls_folder, fname)} for fname in images[n_train+n_val:]])

In [5]:
for split in split_names:
    print(f"Processing split: {split}")
    imgs_out      = os.path.join(dataset_root, "images", split)
    lbls_out      = os.path.join(dataset_root, "labels", split)
    coco_ann_file = os.path.join(dataset_root, "annotations", f"instances_{split}.json")

    # Load or init COCO
    if os.path.exists(coco_ann_file):
        with open(coco_ann_file, "r") as f:
            coco = json.load(f)
        next_image_id = max(img["id"] for img in coco["images"]) + 1 if coco["images"] else 1
        next_ann_id   = max(ann["id"] for ann in coco["annotations"]) + 1 if coco["annotations"] else 1
    else:
        coco = {"images": [], "annotations": [], "categories": categories}
        next_image_id = 1
        next_ann_id   = 1

    processed = {img["file_name"] for img in coco["images"]}

    for img_info in splits[split]:
        fname = img_info["filename"]
        cls = img_info["class"]
        src = img_info["path"]
        dst = os.path.join(imgs_out, fname)
        if fname in processed:
            continue

        shutil.copy(src, dst)

        # Inference
        image = Image.open(src)
        text  = cls.replace("_", " ") + "."
        inputs = processor(images=image, text=text, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = model(**inputs)

        result = processor.post_process_grounded_object_detection(
            outputs,
            inputs.input_ids,
            box_threshold=0.4,
            text_threshold=0.3,
            target_sizes=[image.size[::-1]]
        )[0]

        # Record image
        w, h = image.size
        coco["images"].append({
            "id": next_image_id,
            "file_name": fname,
            "width": w,
            "height": h
        })

        # Annotations + YOLO labels
        yolo_lines = []
        cid = class_to_id[cls]
        for box in result["boxes"]:
            x0, y0, x1, y1 = box.tolist()
            bw, bh = x1 - x0, y1 - y0

            coco["annotations"].append({
                "id": next_ann_id,
                "image_id": next_image_id,
                "category_id": cid,
                "bbox": [x0, y0, bw, bh],
                "area": bw * bh,
                "iscrowd": 0
            })
            next_ann_id += 1

            xc, yc = x0 + bw/2, y0 + bh/2
            yolo_lines.append(f"{cid-1} {xc/w:.6f} {yc/h:.6f} {bw/w:.6f} {bh/h:.6f}")

        stem = os.path.splitext(fname)[0]
        with open(os.path.join(lbls_out, f"{stem}.txt"), "w") as f:
            f.write("\n".join(yolo_lines))

        next_image_id += 1
        processed.add(fname)

        # Checkpoint COCO JSON
        with open(coco_ann_file, "w") as f:
            json.dump(coco, f, indent=2)

    print(f"Completed split: {split}. JSON saved to {coco_ann_file}\n")

print("All splits processed.")

Processing split: train
Completed split: train. JSON saved to /Users/ramana/k/dataset/annotations/instances_train.json

Processing split: val
Completed split: val. JSON saved to /Users/ramana/k/dataset/annotations/instances_val.json

Processing split: test
Completed split: test. JSON saved to /Users/ramana/k/dataset/annotations/instances_test.json

All splits processed.


In [10]:
import json
from collections import Counter

# Path to your annotation file
annotation_file = "/Users/ramana/k/dataset/annotations/instances_train.json"

# Load the annotation file
with open(annotation_file, "r") as f:
    coco = json.load(f)

# Count occurrences of each image_id in annotations
image_id_counts = Counter(ann["image_id"] for ann in coco["annotations"])

# Find image_ids with more than one annotation
duplicate_image_ids = [img_id for img_id, count in image_id_counts.items() if count > 1]

# Number of such image_ids (i.e., number of pairs/sets)
num_duplicate_image_ids = len(duplicate_image_ids)

print(f"Number of image_ids with more than one annotation in train: {num_duplicate_image_ids}")
# Optionally, print the image_ids
#print("Duplicate image_ids:", duplicate_image_ids)

# Path to your annotation file
annotation_file = "/Users/ramana/k/dataset/annotations/instances_test.json"

# Load the annotation file
with open(annotation_file, "r") as f:
    coco = json.load(f)

# Count occurrences of each image_id in annotations
image_id_counts = Counter(ann["image_id"] for ann in coco["annotations"])

# Find image_ids with more than one annotation
duplicate_image_ids = [img_id for img_id, count in image_id_counts.items() if count > 1]

# Number of such image_ids (i.e., number of pairs/sets)
num_duplicate_image_ids = len(duplicate_image_ids)

print(f"Number of image_ids with more than one annotation in test: {num_duplicate_image_ids}")
# Optionally, print the image_ids
#print("Duplicate image_ids:", duplicate_image_ids)

# Path to your annotation file
annotation_file = "/Users/ramana/k/dataset/annotations/instances_val.json"

# Load the annotation file
with open(annotation_file, "r") as f:
    coco = json.load(f)

# Count occurrences of each image_id in annotations
image_id_counts = Counter(ann["image_id"] for ann in coco["annotations"])

# Find image_ids with more than one annotation
duplicate_image_ids = [img_id for img_id, count in image_id_counts.items() if count > 1]

# Number of such image_ids (i.e., number of pairs/sets)
num_duplicate_image_ids = len(duplicate_image_ids)

print(f"Number of image_ids with more than one annotation in val: {num_duplicate_image_ids}")
# Optionally, print the image_ids
#print("Duplicate image_ids:", duplicate_image_ids)

Number of image_ids with more than one annotation in train: 0
Number of image_ids with more than one annotation in test: 0
Number of image_ids with more than one annotation in val: 0


In [9]:
import json
import os

# Path to your annotation file (modify as needed)
annotation_file = "/Users/ramana/k/dataset/annotations/instances_train.json"

# Load the annotation file
with open(annotation_file, "r") as f:
    coco = json.load(f)

# Build a count of annotations per image_id
from collections import Counter, defaultdict

image_id_count = Counter()
for ann in coco["annotations"]:
    image_id_count[ann["image_id"]] += 1

# Find image_ids that have >1 annotation
duplicate_image_ids = {img_id for img_id, count in image_id_count.items() if count > 1}

# Filter out all annotations with a duplicate image_id
filtered_annotations = [ann for ann in coco["annotations"] if ann["image_id"] not in duplicate_image_ids]

# Optionally, you may want to filter images as well, so that images with no annotations are removed:
filtered_image_ids = {ann["image_id"] for ann in filtered_annotations}
filtered_images = [img for img in coco["images"] if img["id"] in filtered_image_ids]

# Update the coco dict
coco["annotations"] = filtered_annotations
coco["images"] = filtered_images

# Save the modified annotation file (back up original first!)
backup_file = annotation_file + ".bak"
os.rename(annotation_file, backup_file)
with open(annotation_file, "w") as f:
    json.dump(coco, f, indent=2)

print(f"Processed. Original file backed up as {backup_file}")


# Path to your annotation file (modify as needed)
annotation_file = "/Users/ramana/k/dataset/annotations/instances_test.json"

# Load the annotation file
with open(annotation_file, "r") as f:
    coco = json.load(f)

# Build a count of annotations per image_id
from collections import Counter, defaultdict

image_id_count = Counter()
for ann in coco["annotations"]:
    image_id_count[ann["image_id"]] += 1

# Find image_ids that have >1 annotation
duplicate_image_ids = {img_id for img_id, count in image_id_count.items() if count > 1}

# Filter out all annotations with a duplicate image_id
filtered_annotations = [ann for ann in coco["annotations"] if ann["image_id"] not in duplicate_image_ids]

# Optionally, you may want to filter images as well, so that images with no annotations are removed:
filtered_image_ids = {ann["image_id"] for ann in filtered_annotations}
filtered_images = [img for img in coco["images"] if img["id"] in filtered_image_ids]

# Update the coco dict
coco["annotations"] = filtered_annotations
coco["images"] = filtered_images

# Save the modified annotation file (back up original first!)
backup_file = annotation_file + ".bak"
os.rename(annotation_file, backup_file)
with open(annotation_file, "w") as f:
    json.dump(coco, f, indent=2)

print(f"Processed. Original file backed up as {backup_file}")


# Path to your annotation file (modify as needed)
annotation_file = "/Users/ramana/k/dataset/annotations/instances_val.json"

# Load the annotation file
with open(annotation_file, "r") as f:
    coco = json.load(f)

# Build a count of annotations per image_id
from collections import Counter, defaultdict

image_id_count = Counter()
for ann in coco["annotations"]:
    image_id_count[ann["image_id"]] += 1

# Find image_ids that have >1 annotation
duplicate_image_ids = {img_id for img_id, count in image_id_count.items() if count > 1}

# Filter out all annotations with a duplicate image_id
filtered_annotations = [ann for ann in coco["annotations"] if ann["image_id"] not in duplicate_image_ids]

# Optionally, you may want to filter images as well, so that images with no annotations are removed:
filtered_image_ids = {ann["image_id"] for ann in filtered_annotations}
filtered_images = [img for img in coco["images"] if img["id"] in filtered_image_ids]

# Update the coco dict
coco["annotations"] = filtered_annotations
coco["images"] = filtered_images

# Save the modified annotation file (back up original first!)
backup_file = annotation_file + ".bak"
os.rename(annotation_file, backup_file)
with open(annotation_file, "w") as f:
    json.dump(coco, f, indent=2)

print(f"Processed. Original file backed up as {backup_file}")

Processed. Original file backed up as /Users/ramana/k/dataset/annotations/instances_train.json.bak
Processed. Original file backed up as /Users/ramana/k/dataset/annotations/instances_test.json.bak
Processed. Original file backed up as /Users/ramana/k/dataset/annotations/instances_val.json.bak


In [23]:
import os
import json
import glob

# Path Setup
ANNOT_DIR = "/Users/ramana/k/dataset/backup"
LABEL_DIR = "/Users/ramana/k/dataset/labels"
SPLITS = ["train", "val", "test"]

def fix_coco_json(json_path):
    with open(json_path, "r") as f:
        coco = json.load(f)

    # Build old-to-new category ID mapping (if needed)
    old_ids = sorted(cat["id"] for cat in coco["categories"])
    id_map = {old_id: new_id for new_id, old_id in enumerate(old_ids)}

    # Update category IDs in categories
    for cat in coco["categories"]:
        cat["id"] = id_map[cat["id"]]

    # Update category_id in annotations
    for ann in coco["annotations"]:
        ann["category_id"] = id_map[ann["category_id"]]

    with open(json_path, "w") as f:
        json.dump(coco, f, indent=2)
    print(f"Updated COCO JSON: {json_path}")

def fix_yolo_labels(label_dir, id_map):
    txt_files = glob.glob(os.path.join(label_dir, "*.txt"))
    for txt_file in txt_files:
        with open(txt_file, "r") as f:
            lines = f.readlines()
        new_lines = []
        for line in lines:
            if not line.strip():
                continue
            parts = line.split()
            old_cls_id = int(parts[0])
            new_cls_id = id_map[old_cls_id]
            new_line = " ".join([str(new_cls_id)] + parts[1:]) + "\n"
            new_lines.append(new_line)
        with open(txt_file, "w") as f:
            f.writelines(new_lines)
    print(f"Updated YOLO labels in {label_dir}")

# --- MAIN ---

# Step 1: Fix COCO JSON files and build the class ID map
id_map = None
for split in SPLITS:
    json_path = os.path.join(ANNOT_DIR, f"instances_{split}.json")
    with open(json_path, "r") as f:
        coco = json.load(f)
    # Only need to build id_map once
    if id_map is None:
        old_ids = sorted(cat["id"] for cat in coco["categories"])
        id_map = {old_id: new_id for new_id, old_id in enumerate(old_ids)}
    fix_coco_json(json_path)

# Step 2: Fix YOLO label files
for split in SPLITS:
    label_dir = os.path.join(LABEL_DIR, split)
    fix_yolo_labels(label_dir, id_map)

Updated COCO JSON: /Users/ramana/k/dataset/backup/instances_train.json
Updated COCO JSON: /Users/ramana/k/dataset/backup/instances_val.json
Updated COCO JSON: /Users/ramana/k/dataset/backup/instances_test.json


KeyError: 0

In [24]:
import json
print("====================================train====================================")
# Path to your COCO JSON annotation file
json_path = "/Users/ramana/k/dataset/annotations/instances_train.json"  # Change as needed

with open(json_path, "r") as f:
    data = json.load(f)

categories = data["categories"]
images = data["images"]

# Build a mapping: category name -> count of images containing it in their filename
category_counts = {}

for cat in categories:
    name = cat["name"]
    count = sum(1 for img in images if name in img["file_name"])
    category_counts[name] = count

# Output results
for name, count in category_counts.items():
    print(f"{name}: {count} images")


print("====================================test====================================")

json_path = "/Users/ramana/k/dataset/annotations/instances_test.json"  # Change as needed

with open(json_path, "r") as f:
    data = json.load(f)

categories = data["categories"]
images = data["images"]

# Build a mapping: category name -> count of images containing it in their filename
category_counts = {}

for cat in categories:
    name = cat["name"]
    count = sum(1 for img in images if name in img["file_name"])
    category_counts[name] = count

# Output results
for name, count in category_counts.items():
    print(f"{name}: {count} images")

print("====================================val====================================")

json_path = "/Users/ramana/k/dataset/annotations/instances_val.json"  # Change as needed

with open(json_path, "r") as f:
    data = json.load(f)

categories = data["categories"]
images = data["images"]

# Build a mapping: category name -> count of images containing it in their filename
category_counts = {}

for cat in categories:
    name = cat["name"]
    count = sum(1 for img in images if name in img["file_name"])
    category_counts[name] = count

# Output results
for name, count in category_counts.items():
    print(f"{name}: {count} images")

Aloo_matar: 392 images
Besan_cheela: 488 images
Biryani: 480 images
Chapathi: 449 images
Chole_bature: 319 images
Dahl: 174 images
Dhokla: 416 images
Dosa: 322 images
Gulab_jamun: 181 images
Idli: 254 images
Jalebi: 506 images
Kadai_paneer: 467 images
Naan: 455 images
Paani_puri: 350 images
Pakoda: 435 images
Pav_bhaji: 372 images
Poha: 513 images
Rolls: 225 images
Samosa: 268 images
Vada_pav: 459 images
Aloo_matar: 86 images
Besan_cheela: 110 images
Biryani: 106 images
Chapathi: 93 images
Chole_bature: 77 images
Dahl: 34 images
Dhokla: 81 images
Dosa: 73 images
Gulab_jamun: 40 images
Idli: 54 images
Jalebi: 109 images
Kadai_paneer: 104 images
Naan: 94 images
Paani_puri: 71 images
Pakoda: 100 images
Pav_bhaji: 79 images
Poha: 111 images
Rolls: 42 images
Samosa: 64 images
Vada_pav: 93 images
Aloo_matar: 88 images
Besan_cheela: 107 images
Biryani: 108 images
Chapathi: 107 images
Chole_bature: 69 images
Dahl: 36 images
Dhokla: 92 images
Dosa: 65 images
Gulab_jamun: 32 images
Idli: 63 imag

In [21]:
import json

splits = ["train", "test", "val"]
base_path = "/Users/ramana/k/dataset/annotations"
total_images = 0
split_counts = {}

for split in splits:
    json_path = f"{base_path}/instances_{split}.json"
    with open(json_path, "r") as f:
        data = json.load(f)
    n_images = len(data["images"])
    split_counts[split] = n_images
    total_images += n_images

for split, count in split_counts.items():
    print(f"{split}: {count} images")
print(f"TOTAL: {total_images} images")

train: 7525 images
test: 1621 images
val: 1631 images
TOTAL: 10777 images


In [25]:
import os
import json
from PIL import Image, ImageDraw, ImageFont
from collections import defaultdict

dataset_root = "/Users/ramana/k/dataset"
split_names = ["train", "val", "test"]

# (Optional) font for class labels
try:
    font = ImageFont.truetype("arial.ttf", 20)
except:
    font = ImageFont.load_default()

for split in split_names:
    images_dir = os.path.join(dataset_root, "images", split)
    coco_ann_file = os.path.join(dataset_root, "annotations", f"instances_{split}.json")
    output_dir = os.path.join(dataset_root, f"images_with_boxes_{split}")
    os.makedirs(output_dir, exist_ok=True)

    if not os.path.exists(coco_ann_file):
        print(f"COCO annotation file not found: {coco_ann_file}. Skipping split '{split}'.")
        continue

    with open(coco_ann_file, "r") as f:
        coco = json.load(f)

    image_id_to_file = {img['id']: img['file_name'] for img in coco['images']}
    cat_id_to_name = {cat['id']: cat['name'] for cat in coco['categories']}
    anns_by_img = defaultdict(list)
    for ann in coco['annotations']:
        anns_by_img[ann['image_id']].append(ann)

    for image_id, filename in image_id_to_file.items():
        img_path = os.path.join(images_dir, filename)
        if not os.path.exists(img_path):
            print(f"Image file {img_path} not found, skipping.")
            continue

        image = Image.open(img_path).convert("RGB")
        draw = ImageDraw.Draw(image)

        for ann in anns_by_img[image_id]:
            bbox = ann['bbox']  # [x0, y0, w, h]
            x0, y0, w, h = bbox
            x1, y1 = x0 + w, y0 + h
            cat_name = cat_id_to_name.get(ann["category_id"], "unknown")

            # Draw rectangle
            draw.rectangle([x0, y0, x1, y1], outline="red", width=3)
            # Draw label
            label = f"{cat_name}"

            # --- FIX: Get text size ---
            try:
                # Newer Pillow: use textbbox for accurate sizing
                text_bbox = draw.textbbox((x0, y0), label, font=font)
                text_w, text_h = text_bbox[2] - text_bbox[0], text_bbox[3] - text_bbox[1]
            except AttributeError:
                # Fallback for older Pillow
                text_w, text_h = font.getsize(label)

            draw.rectangle([x0, y0 - text_h, x0 + text_w, y0], fill="red")
            draw.text((x0, y0 - text_h), label, fill="white", font=font)

        out_path = os.path.join(output_dir, filename)
        image.save(out_path)
        print(f"Saved: {out_path}")

    print(f"All images with bounding boxes for '{split}' saved to: {output_dir}")

print("All splits processed.")

Saved: /Users/ramana/k/dataset/images_with_boxes_train/Aloo_matar506.jpg
Saved: /Users/ramana/k/dataset/images_with_boxes_train/Aloo_matar663.jpg
Saved: /Users/ramana/k/dataset/images_with_boxes_train/Aloo_matar520.jpg
Saved: /Users/ramana/k/dataset/images_with_boxes_train/Aloo_matar286.jpg
Saved: /Users/ramana/k/dataset/images_with_boxes_train/Aloo_matar451.jpg
Saved: /Users/ramana/k/dataset/images_with_boxes_train/Aloo_matar624.jpg
Saved: /Users/ramana/k/dataset/images_with_boxes_train/Aloo_matar715.jpg
Saved: /Users/ramana/k/dataset/images_with_boxes_train/Aloo_matar661.jpg
Saved: /Users/ramana/k/dataset/images_with_boxes_train/Aloo_matar463.jpg
Saved: /Users/ramana/k/dataset/images_with_boxes_train/Aloo_matar351.jpg
Saved: /Users/ramana/k/dataset/images_with_boxes_train/Aloo_matar305.jpg
Saved: /Users/ramana/k/dataset/images_with_boxes_train/Aloo_matar759.jpg
Saved: /Users/ramana/k/dataset/images_with_boxes_train/Aloo_matar383.jpg
Saved: /Users/ramana/k/dataset/images_with_boxes_tr

In [2]:
import os

# Paths
dataset_root = "/Users/ramana/k/dataset"
splits = ["train", "val", "test"]

# Extensions to match image files
image_extensions = [".jpg", ".jpeg", ".png"]

for split in splits:
    images_dir = os.path.join(dataset_root, "images", split)
    labels_dir = os.path.join(dataset_root, "labels", split)

    for fname in os.listdir(labels_dir):
        if not fname.endswith(".txt"):
            continue

        label_path = os.path.join(labels_dir, fname)

        # Check if label file is empty
        if os.stat(label_path).st_size == 0:
            # Corresponding image file stem
            stem = os.path.splitext(fname)[0]

            # Try deleting all possible image file variants
            for ext in image_extensions:
                image_path = os.path.join(images_dir, stem + ext)
                if os.path.exists(image_path):
                    os.remove(image_path)
                    print(f"Deleted image: {image_path}")
                    break  # Only one match needed

            # Delete the label file itself
            os.remove(label_path)
            print(f"Deleted empty label: {label_path}")

print("Cleanup completed.")


Deleted image: /Users/ramana/k/dataset/images/train/Pav_bhaji647.jpg
Deleted empty label: /Users/ramana/k/dataset/labels/train/Pav_bhaji647.txt
Deleted image: /Users/ramana/k/dataset/images/train/Dahl688.jpg
Deleted empty label: /Users/ramana/k/dataset/labels/train/Dahl688.txt
Deleted image: /Users/ramana/k/dataset/images/train/Idli134.jpg
Deleted empty label: /Users/ramana/k/dataset/labels/train/Idli134.txt
Deleted image: /Users/ramana/k/dataset/images/train/Dahl111.jpg
Deleted empty label: /Users/ramana/k/dataset/labels/train/Dahl111.txt
Deleted image: /Users/ramana/k/dataset/images/train/Gulab_jamun358.jpg
Deleted empty label: /Users/ramana/k/dataset/labels/train/Gulab_jamun358.txt
Deleted image: /Users/ramana/k/dataset/images/train/Aloo_matar159.jpg
Deleted empty label: /Users/ramana/k/dataset/labels/train/Aloo_matar159.txt
Deleted image: /Users/ramana/k/dataset/images/train/Idli108.jpg
Deleted empty label: /Users/ramana/k/dataset/labels/train/Idli108.txt
Deleted image: /Users/rama

In [3]:
import os
from collections import defaultdict

# Path to search
root_dir = "/Users/ramana/k/dataset/images"

# Provided category names
categories = [
    "Aloo_matar", "Besan_cheela", "Biryani", "Chapathi", "Chole_bature",
    "Dahl", "Dhokla", "Dosa", "Gulab_jamun", "Idli", "Jalebi", "Kadai_paneer",
    "Naan", "Paani_puri", "Pakoda", "Pav_bhaji", "Poha", "Rolls", "Samosa", "Vada_pav"
]

# Normalize name function
def normalize(name):
    return name.lower().replace("_", "").replace("-", "")

# Prepare normalized categories
normalized_categories = {normalize(name): name for name in categories}
category_counts = defaultdict(int)

# Go through each split
for split in ["train", "val", "test"]:
    split_dir = os.path.join(root_dir, split)
    for fname in os.listdir(split_dir):
        stem, _ = os.path.splitext(fname)
        norm_stem = normalize(stem)

        # Check if stem starts with any normalized category
        for norm_cat, orig_cat in normalized_categories.items():
            if norm_stem.startswith(norm_cat):
                category_counts[orig_cat] += 1
                break

# Print result
for cat in categories:
    count = category_counts[cat]
    print(f"{cat}: {count} file(s)")


Aloo_matar: 566 file(s)
Besan_cheela: 715 file(s)
Biryani: 696 file(s)
Chapathi: 734 file(s)
Chole_bature: 668 file(s)
Dahl: 247 file(s)
Dhokla: 660 file(s)
Dosa: 654 file(s)
Gulab_jamun: 609 file(s)
Idli: 396 file(s)
Jalebi: 740 file(s)
Kadai_paneer: 688 file(s)
Naan: 695 file(s)
Paani_puri: 586 file(s)
Pakoda: 742 file(s)
Pav_bhaji: 551 file(s)
Poha: 735 file(s)
Rolls: 725 file(s)
Samosa: 647 file(s)
Vada_pav: 683 file(s)


In [10]:
import os
from collections import defaultdict


# Dataset images root
dataset_root = "/Users/ramana/k/dataset-2-2/images"

# Provided category names
categories = [
    "Aloo_matar", "Besan_cheela", "Biryani", "Chapathi", "Chole_bature",
    "Dahl", "Dhokla", "Dosa", "Gulab_jamun", "Idli", "Jalebi", "Kadai_paneer",
    "Naan", "Paani_puri", "Pakoda", "Pav_bhaji", "Poha", "Rolls", "Samosa", "Vada_pav"
]

# Normalize function
def normalize(name):
    return name.lower().replace("_", "").replace("-", "")

# Build normalized category mapping
normalized_categories = {normalize(cat): cat for cat in categories}

# For each split
splits = ["train", "val", "test"]

for split in splits:
    print(f"\n🔹 Split: {split}")
    split_dir = os.path.join(dataset_root, split)
    cat_counts = defaultdict(int)

    for fname in os.listdir(split_dir):
        if not fname.lower().endswith((".jpg", ".jpeg", ".png")):
            continue

        stem = os.path.splitext(fname)[0]
        norm_stem = normalize(stem)

        for norm_cat, orig_cat in normalized_categories.items():
            if norm_stem.startswith(norm_cat):
                cat_counts[orig_cat] += 1
                break

    for cat in categories:
        print(f"{cat}: {cat_counts[cat]} file(s)")



🔹 Split: train
Aloo_matar: 392 file(s)
Besan_cheela: 497 file(s)
Biryani: 481 file(s)
Chapathi: 514 file(s)
Chole_bature: 459 file(s)
Dahl: 176 file(s)
Dhokla: 465 file(s)
Dosa: 460 file(s)
Gulab_jamun: 427 file(s)
Idli: 270 file(s)
Jalebi: 516 file(s)
Kadai_paneer: 478 file(s)
Naan: 492 file(s)
Paani_puri: 413 file(s)
Pakoda: 522 file(s)
Pav_bhaji: 382 file(s)
Poha: 514 file(s)
Rolls: 504 file(s)
Samosa: 449 file(s)
Vada_pav: 484 file(s)

🔹 Split: val
Aloo_matar: 88 file(s)
Besan_cheela: 108 file(s)
Biryani: 108 file(s)
Chapathi: 111 file(s)
Chole_bature: 104 file(s)
Dahl: 37 file(s)
Dhokla: 96 file(s)
Dosa: 93 file(s)
Gulab_jamun: 91 file(s)
Idli: 65 file(s)
Jalebi: 113 file(s)
Kadai_paneer: 105 file(s)
Naan: 101 file(s)
Paani_puri: 91 file(s)
Pakoda: 108 file(s)
Pav_bhaji: 86 file(s)
Poha: 110 file(s)
Rolls: 111 file(s)
Samosa: 96 file(s)
Vada_pav: 102 file(s)

🔹 Split: test
Aloo_matar: 86 file(s)
Besan_cheela: 110 file(s)
Biryani: 107 file(s)
Chapathi: 109 file(s)
Chole_bature: 10

In [6]:
import os
import random

# Paths
dataset_root = "/Users/ramana/k/dataset-2"
splits = {"train": 150, "val": 25, "test": 25}

# Categories
categories = [
    "Aloo_matar", "Besan_cheela", "Biryani", "Chapathi", "Chole_bature",
    "Dahl", "Dhokla", "Dosa", "Gulab_jamun", "Idli", "Jalebi", "Kadai_paneer",
    "Naan", "Paani_puri", "Pakoda", "Pav_bhaji", "Poha", "Rolls", "Samosa", "Vada_pav"
]

def normalize(name):
    return name.lower().replace("_", "").replace("-", "")

norm_cat_map = {normalize(cat): cat for cat in categories}

image_exts = [".jpg", ".jpeg", ".png"]

for split, keep_count in splits.items():
    print(f"\n🔹 Trimming split: {split}")
    img_dir = os.path.join(dataset_root, "images", split)
    lbl_dir = os.path.join(dataset_root, "labels", split)

    # Get all files
    files_by_cat = {cat: [] for cat in categories}
    for fname in os.listdir(img_dir):
        stem, ext = os.path.splitext(fname)
        if ext.lower() not in image_exts:
            continue
        norm_stem = normalize(stem)

        for norm_cat, orig_cat in norm_cat_map.items():
            if norm_stem.startswith(norm_cat):
                files_by_cat[orig_cat].append(stem)
                break

    # Trim files
    for cat, files in files_by_cat.items():
        if len(files) <= keep_count:
            continue  # already within limit

        random.seed(42)
        random.shuffle(files)

        keep_files = set(files[:keep_count])
        remove_files = set(files[keep_count:])

        for stem in remove_files:
            # Delete image
            for ext in image_exts:
                img_path = os.path.join(img_dir, stem + ext)
                if os.path.exists(img_path):
                    os.remove(img_path)
                    print(f"Deleted image: {img_path}")
                    break

            # Delete label
            label_path = os.path.join(lbl_dir, stem + ".txt")
            if os.path.exists(label_path):
                os.remove(label_path)
                print(f"Deleted label: {label_path}")

    print(f"✅ Trimmed {split} to {keep_count} per class.")



🔹 Trimming split: train
Deleted image: /Users/ramana/k/dataset-2/images/train/Aloo_matar574.jpg
Deleted label: /Users/ramana/k/dataset-2/labels/train/Aloo_matar574.txt
Deleted image: /Users/ramana/k/dataset-2/images/train/Aloo_matar243.jpg
Deleted label: /Users/ramana/k/dataset-2/labels/train/Aloo_matar243.txt
Deleted image: /Users/ramana/k/dataset-2/images/train/Aloo_matar239.jpg
Deleted label: /Users/ramana/k/dataset-2/labels/train/Aloo_matar239.txt
Deleted image: /Users/ramana/k/dataset-2/images/train/Aloo_matar65.jpg
Deleted label: /Users/ramana/k/dataset-2/labels/train/Aloo_matar65.txt
Deleted image: /Users/ramana/k/dataset-2/images/train/Aloo_matar115.jpg
Deleted label: /Users/ramana/k/dataset-2/labels/train/Aloo_matar115.txt
Deleted image: /Users/ramana/k/dataset-2/images/train/Aloo_matar427.jpg
Deleted label: /Users/ramana/k/dataset-2/labels/train/Aloo_matar427.txt
Deleted image: /Users/ramana/k/dataset-2/images/train/Aloo_matar733.jpg
Deleted label: /Users/ramana/k/dataset-2/

In [1]:
import os
import cv2

# Base directories
base_image_dir = "/Users/ramana/k/dataset-full/images"
base_label_dir = "/Users/ramana/k/dataset-full/labels"
base_output_dir = "/Users/ramana/k/output_with_boxes"

# Splits to process
splits = ["train", "val", "test"]

# Optional: define class names (replace with your actual class list)
class_names = ["class0", "class1", "class2"]  # Modify as needed

for split in splits:
    image_dir = os.path.join(base_image_dir, split)
    label_dir = os.path.join(base_label_dir, split)
    output_dir = os.path.join(base_output_dir, split)

    os.makedirs(output_dir, exist_ok=True)

    print(f"Processing split: {split}")

    for fname in os.listdir(image_dir):
        if not fname.lower().endswith((".jpg", ".jpeg", ".png")):
            continue

        image_path = os.path.join(image_dir, fname)
        label_path = os.path.join(label_dir, os.path.splitext(fname)[0] + ".txt")

        if not os.path.exists(label_path):
            continue

        # Load image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Could not read image: {image_path}")
            continue

        height, width = image.shape[:2]

        # Read YOLO label file
        with open(label_path, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) != 5:
                    continue  # skip invalid entries

                class_id, x_center, y_center, box_width, box_height = map(float, parts)
                class_id = int(class_id)

                # Convert YOLO to pixel coordinates
                xc = x_center * width
                yc = y_center * height
                w = box_width * width
                h = box_height * height

                x1 = int(xc - w / 2)
                y1 = int(yc - h / 2)
                x2 = int(xc + w / 2)
                y2 = int(yc + h / 2)

                # Draw box
                cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)

                # Draw label
                label = class_names[class_id] if class_id < len(class_names) else f"class_{class_id}"
                cv2.putText(image, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

        # Save output
        out_path = os.path.join(output_dir, fname)
        cv2.imwrite(out_path, image)

    print(f"Completed split: {split}\n")

print("All images processed and saved with bounding boxes.")


Processing split: train
Completed split: train

Processing split: val
Completed split: val

Processing split: test
Completed split: test

All images processed and saved with bounding boxes.


In [3]:
import os

# Define paths
dataset_root = "/Users/ramana/k/dataset-full"
splits = ["train", "val", "test"]

for split in splits:
    print(f"Checking empty labels in {split} split...")

    labels_dir = os.path.join(dataset_root, "labels", split)
    images_dir = os.path.join(dataset_root, "images", split)

    for label_file in os.listdir(labels_dir):
        label_path = os.path.join(labels_dir, label_file)

        # Check if label file is empty
        if os.path.getsize(label_path) == 0:
            # Corresponding image filename
            image_stem = os.path.splitext(label_file)[0]

            # Check common image extensions
            for ext in [".jpg", ".jpeg", ".png"]:
                image_path = os.path.join(images_dir, image_stem + ext)
                if os.path.exists(image_path):
                    os.remove(image_path)
                    print(f"Deleted image: {image_path}")
                    break

            # Remove the empty label file
            os.remove(label_path)
            print(f"Deleted empty label: {label_path}")

    print(f"Completed cleaning {split} split.\n")

print("Finished cleaning all splits.")


Checking empty labels in train split...
Deleted image: /Users/ramana/k/dataset-full/images/train/Pav_bhaji647.jpg
Deleted empty label: /Users/ramana/k/dataset-full/labels/train/Pav_bhaji647.txt
Deleted image: /Users/ramana/k/dataset-full/images/train/Dahl688.jpg
Deleted empty label: /Users/ramana/k/dataset-full/labels/train/Dahl688.txt
Deleted image: /Users/ramana/k/dataset-full/images/train/Idli134.jpg
Deleted empty label: /Users/ramana/k/dataset-full/labels/train/Idli134.txt
Deleted image: /Users/ramana/k/dataset-full/images/train/Dahl111.jpg
Deleted empty label: /Users/ramana/k/dataset-full/labels/train/Dahl111.txt
Deleted image: /Users/ramana/k/dataset-full/images/train/Gulab_jamun358.jpg
Deleted empty label: /Users/ramana/k/dataset-full/labels/train/Gulab_jamun358.txt
Deleted image: /Users/ramana/k/dataset-full/images/train/Aloo_matar159.jpg
Deleted empty label: /Users/ramana/k/dataset-full/labels/train/Aloo_matar159.txt
Deleted image: /Users/ramana/k/dataset-full/images/train/Idl

In [4]:
import os
import cv2

# Base directories
base_image_dir = "/Users/ramana/k/dataset-full/images"
base_label_dir = "/Users/ramana/k/dataset-full/labels"
base_output_dir = "/Users/ramana/k/output_with_boxes"

# Splits to process
splits = ["train", "val", "test"]

# Optional: define class names (replace with your actual class list)
class_names = ["class0", "class1", "class2"]  # Modify as needed

for split in splits:
    image_dir = os.path.join(base_image_dir, split)
    label_dir = os.path.join(base_label_dir, split)
    output_dir = os.path.join(base_output_dir, split)

    os.makedirs(output_dir, exist_ok=True)

    print(f"Processing split: {split}")

    for fname in os.listdir(image_dir):
        if not fname.lower().endswith((".jpg", ".jpeg", ".png")):
            continue

        image_path = os.path.join(image_dir, fname)
        label_path = os.path.join(label_dir, os.path.splitext(fname)[0] + ".txt")

        if not os.path.exists(label_path):
            continue

        # Load image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Could not read image: {image_path}")
            continue

        height, width = image.shape[:2]

        # Read YOLO label file
        with open(label_path, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) != 5:
                    continue  # skip invalid entries

                class_id, x_center, y_center, box_width, box_height = map(float, parts)
                class_id = int(class_id)

                # Convert YOLO to pixel coordinates
                xc = x_center * width
                yc = y_center * height
                w = box_width * width
                h = box_height * height

                x1 = int(xc - w / 2)
                y1 = int(yc - h / 2)
                x2 = int(xc + w / 2)
                y2 = int(yc + h / 2)

                # Draw box
                cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)

                # Draw label
                label = class_names[class_id] if class_id < len(class_names) else f"class_{class_id}"
                cv2.putText(image, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

        # Save output
        out_path = os.path.join(output_dir, fname)
        cv2.imwrite(out_path, image)

    print(f"Completed split: {split}\n")

print("All images processed and saved with bounding boxes.")


Processing split: train
Completed split: train

Processing split: val
Completed split: val

Processing split: test
Completed split: test

All images processed and saved with bounding boxes.
