## SIPEC to YOLO conversion

In [None]:
import json
import os


via_json_path = "D:/DeepSORT_ML2025/2021-10-11_3/2021-10-11_3/3_via_project_14Nov2021_10h9m.json"
output_dir = 'D:/DeepSORT_ML2025/2021-10-11_3/2021-10-11_3/label'
image_width = 1640 
image_height = 1232  
class_id = 0  

os.makedirs(output_dir, exist_ok=True)

with open(via_json_path, 'r', encoding='latin-1') as f:
    data = json.load(f)


for img_id, img_data in data.items():
    filename = img_data['filename']
    regions = img_data.get('regions', [])

    yolo_lines = []

    for region in regions:
        shape = region.get('shape_attributes', {})
        if shape.get('name') != 'polygon':
            continue

        all_x = shape.get('all_points_x', [])
        all_y = shape.get('all_points_y', [])
        if not all_x or not all_y:
            continue

        xmin = min(all_x)
        xmax = max(all_x)
        ymin = min(all_y)
        ymax = max(all_y)

        x_center = (xmin + xmax) / 2.0 / image_width
        y_center = (ymin + ymax) / 2.0 / image_height
        width = (xmax - xmin) / image_width
        height = (ymax - ymin) / image_height

        yolo_line = f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}"
        yolo_lines.append(yolo_line)

    base_name = os.path.splitext(filename)[0]
    out_path = os.path.join(output_dir, f"{base_name}.txt")
    with open(out_path, 'w') as out_f:
        out_f.write('\n'.join(yolo_lines))

print("YOLO label conversion complete.")


## Check SIPEC to YOLO conversion

In [None]:
import os
import cv2


image_folder = 'd:/DeepSORT_ML2025/2021-10-11_3/2021-10-11_1'
label_folder = 'd:/DeepSORT_ML2025/2021-10-11_3/2021-10-11_1/label'
output_folder = 'output_images_with_boxes'
image_width = 1640  
image_height = 1232 
os.makedirs(output_folder, exist_ok=True)

for label_file in os.listdir(label_folder):
    if not label_file.endswith('.txt'):
        continue

    base_name = os.path.splitext(label_file)[0]
    image_path = os.path.join(image_folder, base_name + '.png')  
    label_path = os.path.join(label_folder, label_file)

    if not os.path.exists(image_path):
        print(f"Image not found for {label_file}, skipping...")
        continue

 
    img = cv2.imread(image_path)
    if img is None:
        print(f"Could not read image {image_path}, skipping...")
        continue

  
    with open(label_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) != 5:
                continue
            _, x_center, y_center, width, height = map(float, parts)
            
            
            x_center *= image_width
            y_center *= image_height
            width *= image_width
            height *= image_height

            
            
            x1 = int(x_center - width / 2)
            y1 = int(y_center - height / 2)
            x2 = int(x_center + width / 2)
            y2 = int(y_center + height / 2)

            
            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)

    
    out_path = os.path.join(output_folder, base_name + '_boxed.png')
    cv2.imwrite(out_path, img)


In [None]:
import os


folder_path = "D:/DeepSORT_ML2025/Scripts/marmoset_dataset/images"  
prefix = 'JM_ind1_cam10_post_'  


for filename in os.listdir(folder_path):
    old_path = os.path.join(folder_path, filename)
    if os.path.isfile(old_path):
        new_filename = prefix + filename
        new_path = os.path.join(folder_path, new_filename)
        os.rename(old_path, new_path)

print("Renaming complete.")

Renaming complete.


## Remove excess images

In [6]:
import os


images_folder = r"D:\DeepSORT_ML2025\video_stitch\new\extracted1"
labels_folder = r"D:\DeepSORT_ML2025\video_stitch\new\label"

valid_pairs = 0

for image_file in os.listdir(images_folder):
    if not image_file.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue

    base_name = os.path.splitext(image_file)[0]
    label_file = base_name + '.txt'
    label_path = os.path.join(labels_folder, label_file)

    image_path = os.path.join(images_folder, image_file)

    if os.path.exists(label_path):
        valid_pairs += 1
    else:
        os.remove(image_path)
        print(f"Deleted unmatched image: {image_file}")

print(f"\nTotal matched image-label pairs kept: {valid_pairs}")


Deleted unmatched image: NuNo_rew2_cam11_pre_frame_0000.jpg
Deleted unmatched image: NuNo_rew2_cam11_pre_frame_0004.jpg
Deleted unmatched image: NuNo_rew2_cam11_pre_frame_0005.jpg
Deleted unmatched image: NuNo_rew2_cam11_pre_frame_0007.jpg
Deleted unmatched image: NuNo_rew2_cam11_pre_frame_0008.jpg
Deleted unmatched image: NuNo_rew2_cam11_pre_frame_0009.jpg
Deleted unmatched image: NuNo_rew2_cam11_pre_frame_0010.jpg
Deleted unmatched image: NuNo_rew2_cam11_pre_frame_0011.jpg
Deleted unmatched image: NuNo_rew2_cam11_pre_frame_0012.jpg
Deleted unmatched image: NuNo_rew2_cam11_pre_frame_0015.jpg
Deleted unmatched image: NuNo_rew2_cam11_pre_frame_0016.jpg
Deleted unmatched image: NuNo_rew2_cam11_pre_frame_0017.jpg
Deleted unmatched image: NuNo_rew2_cam11_pre_frame_0019.jpg
Deleted unmatched image: NuNo_rew2_cam11_pre_frame_0021.jpg
Deleted unmatched image: NuNo_rew2_cam11_pre_frame_0024.jpg
Deleted unmatched image: NuNo_rew2_cam11_pre_frame_0025.jpg
Deleted unmatched image: NuNo_rew2_cam11

## Remove excess label files

In [4]:
import os


images_folder = "D:/DeepSORT_ML2025/yolo_project/dataset/images/train"
labels_folder = "D:/DeepSORT_ML2025/yolo_project/dataset/labels/train"
image_extensions = ['.jpg', '.jpeg', '.png']  

image_basenames = {
    os.path.splitext(f)[0]
    for f in os.listdir(images_folder)
    if os.path.splitext(f)[1].lower() in image_extensions
}

deleted_count = 0

for label_file in os.listdir(labels_folder):
    if not label_file.endswith('.txt'):
        continue

    base_name = os.path.splitext(label_file)[0]
    label_path = os.path.join(labels_folder, label_file)

    if base_name not in image_basenames:
        os.remove(label_path)
        print(f"🗑️ Deleted: {label_file}")
        deleted_count += 1

print(f"\n✅ Cleanup complete. Total deleted label files: {deleted_count}")


🗑️ Deleted: frame_0011.txt
🗑️ Deleted: frame_0012.txt
🗑️ Deleted: frame_0013.txt
🗑️ Deleted: frame_0030.txt
🗑️ Deleted: frame_0034.txt
🗑️ Deleted: frame_0043.txt
🗑️ Deleted: frame_0045.txt
🗑️ Deleted: frame_0049.txt
🗑️ Deleted: frame_0066.txt
🗑️ Deleted: frame_0075.txt
🗑️ Deleted: frame_0076.txt
🗑️ Deleted: frame_0105.txt
🗑️ Deleted: frame_0106.txt

✅ Cleanup complete. Total deleted label files: 13


## Split  Dataset into Train, Test and Val

In [6]:
import os
import shutil
import random

def list_files_no_ext(folder, exts):
    if not os.path.exists(folder):
        return set()
    files = [f for f in os.listdir(folder) if os.path.splitext(f)[1].lower() in exts]
    basenames = set(os.path.splitext(f)[0] for f in files)
    return basenames

def count_images_labels(dataset_dir):
    img_exts = {'.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff'}
    counts = {}
    for split in ['train', 'val', 'test']:
        img_dir = os.path.join(dataset_dir, 'images', split)
        label_dir = os.path.join(dataset_dir, 'labels', split)
        img_count = len(list_files_no_ext(img_dir, img_exts))
        label_count = len(list_files_no_ext(label_dir, {'.txt'}))
        counts[split] = {'images': img_count, 'labels': label_count}
    return counts

def move_file(src, dst):
    os.makedirs(os.path.dirname(dst), exist_ok=True)
    shutil.move(src, dst)

def ensure_dir_structure(dataset_dir):
    for split in ['train', 'val', 'test']:
        os.makedirs(os.path.join(dataset_dir, 'images', split), exist_ok=True)
        os.makedirs(os.path.join(dataset_dir, 'labels', split), exist_ok=True)

def combine_all_images(dataset_dir, img_exts):
    all_images = set()
    for split in ['train', 'val', 'test']:
        img_dir = os.path.join(dataset_dir, 'images', split)
        all_images.update(list_files_no_ext(img_dir, img_exts))
    return list(all_images)

def move_image_label_pair(basename, src_img_dir, dst_img_dir, src_lbl_dir, dst_lbl_dir, img_exts):
    for ext in img_exts:
        src_img = os.path.join(src_img_dir, basename + ext)
        if os.path.exists(src_img):
            dst_img = os.path.join(dst_img_dir, basename + ext)
            move_file(src_img, dst_img)
            break
    src_lbl = os.path.join(src_lbl_dir, basename + '.txt')
    if os.path.exists(src_lbl):
        dst_lbl = os.path.join(dst_lbl_dir, basename + '.txt')
        move_file(src_lbl, dst_lbl)

def adjust_split(dataset_dir, train_ratio=0.7, val_ratio=0.15, seed=42):
    random.seed(seed)
    img_exts = {'.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff'}
    
    ensure_dir_structure(dataset_dir)
    all_basenames = combine_all_images(dataset_dir, img_exts)
    random.shuffle(all_basenames)

    total = len(all_basenames)
    num_train = int(total * train_ratio)
    num_val = int(total * val_ratio)
    num_test = total - num_train - num_val

    train_set = set(all_basenames[:num_train])
    val_set = set(all_basenames[num_train:num_train+num_val])
    test_set = set(all_basenames[num_train+num_val:])

    print(f"Total: {total} | Train: {len(train_set)} | Val: {len(val_set)} | Test: {len(test_set)}")

    # Move everything to tmp_all first
    tmp_img = os.path.join(dataset_dir, 'images', 'all')
    tmp_lbl = os.path.join(dataset_dir, 'labels', 'all')
    os.makedirs(tmp_img, exist_ok=True)
    os.makedirs(tmp_lbl, exist_ok=True)

    for split in ['train', 'val', 'test']:
        for ext in img_exts:
            src_dir = os.path.join(dataset_dir, 'images', split)
            for file in os.listdir(src_dir):
                if file.lower().endswith(ext):
                    shutil.move(os.path.join(src_dir, file), tmp_img)
        src_lbl_dir = os.path.join(dataset_dir, 'labels', split)
        for file in os.listdir(src_lbl_dir):
            if file.endswith('.txt'):
                shutil.move(os.path.join(src_lbl_dir, file), tmp_lbl)

    # Move files to new splits
    for basename in train_set:
        move_image_label_pair(basename, tmp_img, os.path.join(dataset_dir, 'images/train'),
                              tmp_lbl, os.path.join(dataset_dir, 'labels/train'), img_exts)
    for basename in val_set:
        move_image_label_pair(basename, tmp_img, os.path.join(dataset_dir, 'images/val'),
                              tmp_lbl, os.path.join(dataset_dir, 'labels/val'), img_exts)
    for basename in test_set:
        move_image_label_pair(basename, tmp_img, os.path.join(dataset_dir, 'images/test'),
                              tmp_lbl, os.path.join(dataset_dir, 'labels/test'), img_exts)

    # Remove temp
    shutil.rmtree(tmp_img)
    shutil.rmtree(tmp_lbl)

    final_counts = count_images_labels(dataset_dir)
    print("✅ Final counts after split:")
    for k, v in final_counts.items():
        print(f"{k}: {v}")

if __name__ == '__main__':
    dataset_dir = input("Enter dataset directory path: ").strip()
    print("📊 Counts before adjustment:")
    print(count_images_labels(dataset_dir))
    
    adjust_split(dataset_dir)


📊 Counts before adjustment:
{'train': {'images': 667, 'labels': 667}, 'val': {'images': 167, 'labels': 167}, 'test': {'images': 0, 'labels': 0}}
Total: 834 | Train: 583 | Val: 125 | Test: 126
✅ Final counts after split:
train: {'images': 583, 'labels': 583}
val: {'images': 125, 'labels': 125}
test: {'images': 126, 'labels': 126}


## Change .txt from 2 classes to 1

In [None]:
import os


labels_base_dir = r"D:\DeepSORT_ML2025\yolo_project\dataset\labels"


for split in ['train', 'val']:
    split_dir = os.path.join(labels_base_dir, split)
    for fname in os.listdir(split_dir):
        if fname.endswith('.txt'):
            file_path = os.path.join(split_dir, fname)
            
            
            with open(file_path, 'r') as f:
                lines = f.readlines()

            new_lines = []
            for line in lines:
                parts = line.strip().split()
                if len(parts) >= 5:
                    parts[0] = '0'  
                    new_lines.append(' '.join(parts))

            
            with open(file_path, 'w') as f:
                f.write('\n'.join(new_lines))

print("✅ All label files updated to single class.")


✅ All label files updated to single class.
