Making train-val splits

In [1]:
import os
import random
import shutil

# Define paths
source_folder = 'Datasets/HRSID_JPG/JPEGImages'
train_folder = 'data/images/train'
test_folder = 'data/images/val'

# Create train and test folders if they don't exist
os.makedirs(train_folder, exist_ok=True)
os.makedirs(test_folder, exist_ok=True)

# Get list of image files
image_files = [f for f in os.listdir(source_folder) if os.path.isfile(os.path.join(source_folder, f))]
print(f"Total images: {len(image_files)}")

# Shuffle the list of image files
random.shuffle(image_files)
print("Shuffled image files for randomness")

# Determine the split ratio (e.g., 80% train, 20% test)
split_ratio = 0.7
num_train = int(len(image_files) * split_ratio)
num_test = len(image_files) - num_train
print("Train test ratio: 70 - 30")

# Move files to train folder
for image_file in image_files[:num_train]:
    source_path = os.path.join(source_folder, image_file)
    destination_path = os.path.join(train_folder, image_file)
    shutil.copy(source_path, destination_path)
print(f"Train images moved: {num_train}")

# Move files to test folder
for image_file in image_files[num_train:]:
    source_path = os.path.join(source_folder, image_file)
    destination_path = os.path.join(test_folder, image_file)
    shutil.copy(source_path, destination_path)
print(f"Test images moved: {num_test}")


Total images: 5604
Shuffled image files for randomness
Train test ratio: 70 - 30
Train images moved: 3922
Test images moved: 1682


In [2]:
import os
import shutil

# Define paths
train_images_folder = 'data/images/train'
val_images_folder = 'data/images/val'
all_annotations_folder = 'all_annotations'
train_labels_folder = 'data/labels/train'
val_labels_folder = 'data/labels/val'

# Create train and test label folders if they don't exist
os.makedirs(train_labels_folder, exist_ok=True)
os.makedirs(val_labels_folder, exist_ok=True)

# Function to move annotations
def move_annotations(image_folder, label_folder):
    image_files = [f for f in os.listdir(image_folder) if os.path.isfile(os.path.join(image_folder, f))]
    for image_file in image_files:
        annotation_file = os.path.splitext(image_file)[0] + ".txt"  # Assuming annotations have .txt extension
        source_path = os.path.join(all_annotations_folder, annotation_file)
        if os.path.exists(source_path):
            destination_path = os.path.join(label_folder, annotation_file)
            shutil.copy(source_path, destination_path)

# Move annotations for train images
move_annotations(train_images_folder, train_labels_folder)
print("Train annotations moved.")

# Move annotations for test images
move_annotations(val_images_folder, val_labels_folder)
print("Test annotations moved.")

Train annotations moved.
Test annotations moved.
