# Data Splitting into Train and Test Split

In [2]:
# Libraries

import os
import shutil
import random

In [3]:
# === CONFIGURATION ===
base_dir = "c:/Users/user/Documents/Real_time_weapon_detection/processed_data"
output_base = "c:/Users/user/Documents/Real_time_weapon_detection/split_data"
train_ratio = 0.8  # 80% train, 20% test

random.seed(42)

# === CLEAN START ===
if os.path.exists(output_base):
    shutil.rmtree(output_base)
os.makedirs(os.path.join(output_base, "train"), exist_ok=True)
os.makedirs(os.path.join(output_base, "test"), exist_ok=True)

# === SPLITTING FUNCTION ===
for class_name in os.listdir(base_dir):
    class_path = os.path.join(base_dir, class_name)
    if not os.path.isdir(class_path):
        continue

    images = [f for f in os.listdir(class_path) if f.lower().endswith((".jpg", ".jpeg", ".png"))]
    random.shuffle(images)

    split_idx = int(len(images) * train_ratio)
    train_imgs = images[:split_idx]
    test_imgs = images[split_idx:]

    # Create class subfolders
    train_class_dir = os.path.join(output_base, "train", class_name)
    test_class_dir = os.path.join(output_base, "test", class_name)
    os.makedirs(train_class_dir, exist_ok=True)
    os.makedirs(test_class_dir, exist_ok=True)

    # Copy images
    for img in train_imgs:
        shutil.copy2(os.path.join(class_path, img), os.path.join(train_class_dir, img))
    for img in test_imgs:
        shutil.copy2(os.path.join(class_path, img), os.path.join(test_class_dir, img))

    print(f"[✓] {class_name}: {len(train_imgs)} train, {len(test_imgs)} test")

print("\n✅ Dataset split completed.")


[✓] human_only: 231 train, 58 test
[✓] human_with_weapon: 312 train, 78 test
[✓] no_threat: 163 train, 41 test
[✓] weapon_only: 246 train, 62 test

✅ Dataset split completed.
