In [1]:
pip install albumentations opencv-python tqdm

Collecting numpy>=1.24.4
  Using cached numpy-1.24.4-cp38-cp38-win_amd64.whl (14.9 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.24.3
    Uninstalling numpy-1.24.3:
      Successfully uninstalled numpy-1.24.3
Note: you may need to restart the kernel to use updated packages.


ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'D:\\Python 3.8 file\\Lib\\site-packages\\~umpy\\.libs\\libopenblas64__v0.3.21-gcc_10_3_0.dll'
Consider using the `--user` option or check the permissions.

You should consider upgrading via the 'd:\python 3.8 file\python.exe -m pip install --upgrade pip' command.


In [5]:
import os
from collections import defaultdict
import random

random.seed(42)  # for reproducibility

# Your combined dataset paths
image_dir = "indian_coin/all/images"
label_dir = "indian_coin/all/labels"

# Output split directories
base_output_dir = "indian_coin"
splits = ['train', 'val', 'test']

# Create mapping of class_id → list of image filenames
class_to_images = defaultdict(list)

# Image-label matching
label_files = [f for f in os.listdir(label_dir) if f.endswith('.txt')]

for label_file in label_files:
    label_path = os.path.join(label_dir, label_file)
    with open(label_path, 'r') as f:
        lines = f.readlines()
    
    classes_in_file = set(int(line.split()[0]) for line in lines)
    image_filename = label_file.replace('.txt', '.jpg')  # or .png

    for class_id in classes_in_file:
        class_to_images[class_id].append(image_filename)

In [6]:
from shutil import copy2
from pathlib import Path

def make_dirs():
    for split in splits:
        os.makedirs(os.path.join(base_output_dir, split, "images"), exist_ok=True)
        os.makedirs(os.path.join(base_output_dir, split, "labels"), exist_ok=True)

def copy_pair(image_name, split):
    src_img = os.path.join(image_dir, image_name)
    src_lbl = os.path.join(label_dir, image_name.replace('.jpg', '.txt'))  # or .png
    dst_img = os.path.join(base_output_dir, split, "images", image_name)
    dst_lbl = os.path.join(base_output_dir, split, "labels", image_name.replace('.jpg', '.txt'))

    if os.path.exists(src_img) and os.path.exists(src_lbl):
        copy2(src_img, dst_img)
        copy2(src_lbl, dst_lbl)

make_dirs()
used_images = set()

for class_id, images in class_to_images.items():
    images = list(set(images))  # remove duplicates
    random.shuffle(images)

    total = len(images)
    train_split = int(0.7 * total)
    val_split = int(0.15 * total)

    split_imgs = {
        'train': images[:train_split],
        'val': images[train_split:train_split + val_split],
        'test': images[train_split + val_split:]
    }

    for split, img_list in split_imgs.items():
        for img in img_list:
            if img not in used_images:
                copy_pair(img, split)
                used_images.add(img)

In [8]:
import os
from collections import defaultdict
import matplotlib.pyplot as plt

def count_classes_and_images(label_dir, num_classes):
    class_counts = defaultdict(int)
    label_files = [f for f in os.listdir(label_dir) if f.endswith('.txt')]
    total_images = len(label_files)

    for lbl_file in label_files:
        with open(os.path.join(label_dir, lbl_file), 'r') as f:
            lines = f.readlines()
        for line in lines:
            class_id = int(line.strip().split()[0])
            class_counts[class_id] += 1

    class_counts_list = [class_counts[i] for i in range(num_classes)]
    return class_counts_list, total_images

# 📁 Define paths
train_labels = os.path.join('indian_coin', 'train', 'labels')
val_labels = os.path.join('indian_coin', 'val', 'labels')
test_labels = os.path.join('indian_coin', 'test', 'labels')  # optional

# 🔢 Number of classes and class names
nc = 6  # change accordingly
class_names = [str(i) for i in range(nc)]  # or use actual class names if available

# 📊 Count
train_counts, train_total = count_classes_and_images(train_labels, nc)
val_counts, val_total = count_classes_and_images(val_labels, nc)
test_counts, test_total = count_classes_and_images(test_labels, nc)

# 🧮 Total images
total_images = train_total + val_total + test_total

# 📌 Print class-wise instance counts
print("\n🔢 Class-wise instance counts:")
for i, name in enumerate(class_names):
    print(f"Class {i} ({name}): Train={train_counts[i]}, Val={val_counts[i]}, Test={test_counts[i]}")

# 📌 Print image totals and percentages
print("\n🧾 Image count summary:")
print(f"Train images: {train_total} ({(train_total/total_images)*100:.2f}%)")
print(f"Val images:   {val_total} ({(val_total/total_images)*100:.2f}%)")
print(f"Test images:  {test_total} ({(test_total/total_images)*100:.2f}%)")
print(f"Total images: {total_images}")


🔢 Class-wise instance counts:
Class 0 (0): Train=910, Val=195, Test=195
Class 1 (1): Train=1447, Val=320, Test=357
Class 2 (2): Train=875, Val=196, Test=202
Class 3 (3): Train=1390, Val=309, Test=320
Class 4 (4): Train=1083, Val=235, Test=229
Class 5 (5): Train=1581, Val=280, Test=325

🧾 Image count summary:
Train images: 2138 (68.81%)
Val images:   474 (15.26%)
Test images:  495 (15.93%)
Total images: 3107
