In [15]:
import os
import shutil
import random
from tqdm import tqdm

# Define paths
DATASET_DIR =  "Data/dataset2"

ANNOTATIONS_DIR = "VOC_lab2"  # Folder with YOLO TXT labels

# Define new dataset split folders
TRAIN_DIR = "images/Training_data"
VAL_DIR = "images/Validation_data"
TEST_DIR = "images/Testing_data"

TRAIN_LABELS = "labels/Training_data"
VAL_LABELS = "labels/Validation_data"
TEST_LABELS = "labels/Testing_data"

# Ensure folders exist
for folder in [TRAIN_DIR, VAL_DIR, TEST_DIR, TRAIN_LABELS, VAL_LABELS, TEST_LABELS]:
    os.makedirs(folder, exist_ok=True)

# Get all image files
image_files = [f for f in os.listdir(DATASET_DIR) if f.endswith(('.jpg', '.png', '.jpeg'))]
random.shuffle(image_files)  # Shuffle to randomize split

# Define split numbers (Modify as needed)
num_train = 1836  # 70% for training
num_val = 229    # 20% for validation
num_test = 229  # Remaining for test

# Assign images to each set
train_images = image_files[:num_train]
val_images = image_files[num_train:num_train + num_val]
test_images = image_files[num_train + num_val:]

# Function to move images & annotations
def move_files(image_list, dest_img_folder, dest_label_folder):
    for img_file in tqdm(image_list, desc=f"Moving files to {dest_img_folder}"):
        src_img = os.path.join(DATASET_DIR, img_file)
        dst_img = os.path.join(dest_img_folder, img_file)
        shutil.move(src_img, dst_img)

        # Move corresponding label file (if exists)
        label_file = os.path.splitext(img_file)[0] + ".txt"
        src_label = os.path.join(ANNOTATIONS_DIR, label_file)
        dst_label = os.path.join(dest_label_folder, label_file)

        if os.path.exists(src_label):
            shutil.move(src_label, dst_label)

# Move images and labels to respective folders
move_files(train_images, TRAIN_DIR, TRAIN_LABELS)
move_files(val_images, VAL_DIR, VAL_LABELS)
move_files(test_images, TEST_DIR, TEST_LABELS)

print("✅ Dataset split into Train, Validation, and Test sets successfully!")
print(f"🔹 Train: {num_train} images\n🔹 Val: {num_val} images\n🔹 Test: {num_test} images")


Moving files to images/Training_data: 100%|███████████████████████████████████████| 1836/1836 [00:02<00:00, 626.18it/s]
Moving files to images/Validation_data: 100%|███████████████████████████████████████| 229/229 [00:00<00:00, 608.04it/s]
Moving files to images/Testing_data: 100%|██████████████████████████████████████████| 235/235 [00:00<00:00, 535.33it/s]

✅ Dataset split into Train, Validation, and Test sets successfully!
🔹 Train: 1836 images
🔹 Val: 229 images
🔹 Test: 229 images





In [14]:
import os
import shutil
import random
from tqdm import tqdm

# Define paths
DATASET_DIR =  "Data/NEU-DET/NEU-DET/IMAGES"

ANNOTATIONS_DIR = "VOC_lab"  # Folder with YOLO TXT labels

# Define new dataset split folders
TRAIN_DIR = "images/Training_data"
VAL_DIR = "images/Validation_data"
TEST_DIR = "images/Testing_data"

TRAIN_LABELS = "labels/Training_data"
VAL_LABELS = "labels/Validation_data"
TEST_LABELS = "labels/Testing_data"

# Ensure folders exist
for folder in [TRAIN_DIR, VAL_DIR, TEST_DIR, TRAIN_LABELS, VAL_LABELS, TEST_LABELS]:
    os.makedirs(folder, exist_ok=True)

# Get all image files
image_files = [f for f in os.listdir(DATASET_DIR) if f.endswith(('.jpg', '.png', '.jpeg'))]
random.shuffle(image_files)  # Shuffle to randomize split

# Define split numbers (Modify as needed)
num_train = 1440  # 70% for training
num_val = 180   # 20% for validation
num_test = 180  # Remaining for test

# Assign images to each set
train_images = image_files[:num_train]
val_images = image_files[num_train:num_train + num_val]
test_images = image_files[num_train + num_val:]

# Function to move images & annotations
def move_files(image_list, dest_img_folder, dest_label_folder):
    for img_file in tqdm(image_list, desc=f"Moving files to {dest_img_folder}"):
        src_img = os.path.join(DATASET_DIR, img_file)
        dst_img = os.path.join(dest_img_folder, img_file)
        shutil.move(src_img, dst_img)

        # Move corresponding label file (if exists)
        label_file = os.path.splitext(img_file)[0] + ".txt"
        src_label = os.path.join(ANNOTATIONS_DIR, label_file)
        dst_label = os.path.join(dest_label_folder, label_file)

        if os.path.exists(src_label):
            shutil.move(src_label, dst_label)

# Move images and labels to respective folders
move_files(train_images, TRAIN_DIR, TRAIN_LABELS)
move_files(val_images, VAL_DIR, VAL_LABELS)
move_files(test_images, TEST_DIR, TEST_LABELS)

print("✅ Dataset split into Train, Validation, and Test sets successfully!")
print(f"🔹 Train: {num_train} images\n🔹 Val: {num_val} images\n🔹 Test: {num_test} images")

Moving files to images/Training_data: 100%|███████████████████████████████████████| 1440/1440 [00:01<00:00, 779.33it/s]
Moving files to images/Validation_data: 100%|███████████████████████████████████████| 180/180 [00:00<00:00, 636.30it/s]
Moving files to images/Testing_data: 100%|██████████████████████████████████████████| 180/180 [00:00<00:00, 770.41it/s]

✅ Dataset split into Train, Validation, and Test sets successfully!
🔹 Train: 1440 images
🔹 Val: 180 images
🔹 Test: 180 images





In [8]:
import os
import xml.etree.ElementTree as ET
from tqdm import tqdm

# Define paths
VOC_DIR = "Data/NEU-DET/NEU-DET/ANNOTATIONS"  # Path where your XML files are stored
YOLO_LABELS_DIR = "VOC_lab"  # Where YOLO TXT annotations will be saved
IMAGE_DIR = "Data/NEU-DET/NEU-DET/IMAGES"  # Path where your images are stored

# Define class names (Modify according to your dataset)
CLASS_NAMES = ["crazing", "inculsion","patches","pitted_surface","rolled_in_scale","scratches",]

# Create labels directory if not exists
os.makedirs(YOLO_LABELS_DIR, exist_ok=True)

# Function to convert VOC (Pascal) to YOLO format
def convert_voc_to_yolo(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    # Get image dimensions
    size = root.find("size")
    img_w = int(size.find("width").text)
    img_h = int(size.find("height").text)

    yolo_txt = ""

    for obj in root.findall("object"):
        class_name = obj.find("name").text
        if class_name not in CLASS_NAMES:
            continue

        class_id = CLASS_NAMES.index(class_name)
        bbox = obj.find("bndbox")
        x_min = int(bbox.find("xmin").text)
        y_min = int(bbox.find("ymin").text)
        x_max = int(bbox.find("xmax").text)
        y_max = int(bbox.find("ymax").text)

        # Convert to YOLO format (normalized values)
        x_center = ((x_min + x_max) / 2) / img_w
        y_center = ((y_min + y_max) / 2) / img_h
        width = (x_max - x_min) / img_w
        height = (y_max - y_min) / img_h

        yolo_txt += f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n"

    return yolo_txt


# Process all XML files
xml_files = [f for f in os.listdir(VOC_DIR) if f.endswith(".xml")]
for xml_file in tqdm(xml_files, desc="Converting Annotations"):
    xml_path = os.path.join(VOC_DIR, xml_file)
    yolo_annotation = convert_voc_to_yolo(xml_path)

    if yolo_annotation:
        txt_filename = os.path.splitext(xml_file)[0] + ".txt"
        txt_path = os.path.join(YOLO_LABELS_DIR, txt_filename)
        with open(txt_path, "w") as f:
            f.write(yolo_annotation)

print("✅ Conversion Completed: XML → YOLO format!")


Converting Annotations: 100%|████████████████████████████████████████████████████| 1800/1800 [00:01<00:00, 1432.86it/s]

✅ Conversion Completed: XML → YOLO format!





In [12]:
import os
import xml.etree.ElementTree as ET
from tqdm import tqdm

# Define paths
VOC_DIR = "Data/lable"  # Path where your XML files are stored
YOLO_LABELS_DIR = "VOC_lab2"  # Where YOLO TXT annotations will be saved
IMAGE_DIR = "Data/dataset2"  # Path where your images are stored

# Define class names (Modify according to your dataset)
CLASS_NAMES = ["1_chongkong", "2_hanfeng","3_yueyawan","4_shuiban","5_youban","6_siban","7_yiwu","8_yahen","9_zhehen","10_yaozhed"]

# Create labels directory if not exists
os.makedirs(YOLO_LABELS_DIR, exist_ok=True)

# Function to convert VOC (Pascal) to YOLO format
def convert_voc_to_yolo(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    # Get image dimensions
    size = root.find("size")
    img_w = int(size.find("width").text)
    img_h = int(size.find("height").text)

    yolo_txt = ""

    for obj in root.findall("object"):
        class_name = obj.find("name").text
        if class_name not in CLASS_NAMES:
            continue

        class_id = CLASS_NAMES.index(class_name)
        bbox = obj.find("bndbox")
        x_min = int(bbox.find("xmin").text)
        y_min = int(bbox.find("ymin").text)
        x_max = int(bbox.find("xmax").text)
        y_max = int(bbox.find("ymax").text)

        # Convert to YOLO format (normalized values)
        x_center = ((x_min + x_max) / 2) / img_w
        y_center = ((y_min + y_max) / 2) / img_h
        width = (x_max - x_min) / img_w
        height = (y_max - y_min) / img_h

        yolo_txt += f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n"

    return yolo_txt


# Process all XML files
xml_files = [f for f in os.listdir(VOC_DIR) if f.endswith(".xml")]
for xml_file in tqdm(xml_files, desc="Converting Annotations"):
    xml_path = os.path.join(VOC_DIR, xml_file)
    yolo_annotation = convert_voc_to_yolo(xml_path)

    if yolo_annotation:
        txt_filename = os.path.splitext(xml_file)[0] + ".txt"
        txt_path = os.path.join(YOLO_LABELS_DIR, txt_filename)
        with open(txt_path, "w") as f:
            f.write(yolo_annotation)

print("✅ Conversion Completed: XML → YOLO format!")


Converting Annotations: 100%|████████████████████████████████████████████████████| 2294/2294 [00:01<00:00, 1343.22it/s]

✅ Conversion Completed: XML → YOLO format!



