In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

In [None]:
# Paths
bird_dir = '/content/drive/MyDrive/datasetbird'  # update if needed
drone_dir = '/content/drive/MyDrive/dataset_txt'
base_path = '/content/yolov5/dataset'


In [None]:
for split in ['train', 'val']:
    os.makedirs(f'{base_path}/images/{split}', exist_ok=True)
    os.makedirs(f'{base_path}/labels/{split}', exist_ok=True)

In [None]:
def collect_pairs(src_dir):
    image_exts = ('.jpg', '.jpeg', '.png')
    all_files = os.listdir(src_dir)
    images = [f for f in all_files if f.lower().endswith(image_exts)]
    pairs = []
    for img in images:
        name = os.path.splitext(img)[0]
        label_file = name + '.txt'
        if label_file in all_files:
            pairs.append((img, label_file))
    return pairs

In [None]:
bird_pairs = collect_pairs(bird_dir)
drone_pairs = collect_pairs(drone_dir)
bird_train, bird_val = train_test_split(bird_pairs, test_size=0.2, random_state=42)
drone_train, drone_val = train_test_split(drone_pairs, test_size=0.2, random_state=42)

In [None]:
# Copy and relabel
def copy_and_relabel(pairs, src_dir, split, label_idx):
    for img, lbl in pairs:
        img_src = os.path.join(src_dir, img)
        lbl_src = os.path.join(src_dir, lbl)
        img_dst = f'{base_path}/images/{split}/{img}'
        lbl_dst = f'{base_path}/labels/{split}/{lbl}'
        shutil.copy(img_src, img_dst)
        with open(lbl_src, 'r') as f:
            lines = f.readlines()
        with open(lbl_dst, 'w') as f:
            for line in lines:
                parts = line.strip().split()
                if parts and parts[0].isdigit():
                    parts[0] = str(label_idx)
                    f.write(' '.join(parts) + '\n')

In [None]:
copy_and_relabel(bird_train, bird_dir, 'train', 0)
copy_and_relabel(bird_val, bird_dir, 'val', 0)
copy_and_relabel(drone_train, drone_dir, 'train', 1)
copy_and_relabel(drone_val, drone_dir, 'val', 1)

print("✅ Dataset copied and relabeled!")