CREATING SINGLE DATASET

In [32]:
import os
import shutil
from glob import glob

In [33]:
def update_labels(label_file, class_mapping):
    """
    Update the class labels in the label file by using a class mapping.
    
    Args:
        label_file (str): Path to the label file.
        class_mapping (dict): Dictionary mapping original class labels to new ones.
    
    Returns:
        str: Updated content of the label file.
    """
    updated_content = []
    with open(label_file, 'r') as f:
        for line in f:
            parts = line.strip().split()
            # Update the class label using the mapping
            class_label = class_mapping[int(parts[0])]
            updated_content.append(f"{class_label} " + " ".join(parts[1:]))
    
    return "\n".join(updated_content)

In [34]:
def generate_yaml_file(output_dir, num_classes, class_names):
    """
    Create a YAML configuration file for the combined dataset.
    
    Args:
        output_dir (str): Output directory path for the combined dataset.
        num_classes (int): Total number of classes in the combined dataset.
        class_names (list): List of class names.
    """
    yaml_content = f"""
# Combined Dataset Configuration
path: {output_dir}
train: {output_dir}/train/images
val: {output_dir}/val/images
test: {output_dir}/test/images

# Number of classes
nc: {num_classes}

# Class names
names: [{', '.join([f"'{name}'" for name in class_names])}]
"""
    yaml_path = os.path.join(output_dir, "dataset_config.yaml")
    with open(yaml_path, 'w') as yaml_file:
        yaml_file.write(yaml_content)
    print(f"YAML configuration file created at: {yaml_path}")

In [35]:
def combine_datasets(base_dir, dataset_dirs, num_classes_list, class_names, output_dir):
    """
    Combine multiple datasets into a single dataset with unique class labels.
    
    Args:
        base_dir (str): Base directory path where datasets are located.
        dataset_dirs (list): List of dataset directories to combine.
        num_classes_list (list): List containing the number of classes for each dataset.
        class_names (list): List of all class names for the combined dataset.
        output_dir (str): Output directory path for the combined dataset.
    """
    # Initialize the combined class mapping
    current_max_class = 0
    global_image_counter = 1  # Initialize global counter for unique filenames

    # Prepare the output directories
    for folder in ['train', 'val', 'test']:
        os.makedirs(os.path.join(output_dir, folder, 'images'), exist_ok=True)
        os.makedirs(os.path.join(output_dir, folder, 'labels'), exist_ok=True)
    
    # Process each dataset
    for idx, dataset in enumerate(dataset_dirs):
        print(f"Processing dataset: {dataset}")
        
        # Get the number of classes for the current dataset
        num_classes = num_classes_list[idx]
        
        # Create a class mapping for the current dataset
        class_mapping = {i: i + current_max_class for i in range(num_classes)}

        for folder in ['train', 'val', 'test']:
            image_dir = os.path.join(base_dir, dataset, folder, 'images')
            label_dir = os.path.join(base_dir, dataset, folder, 'labels')

            # Combine images and labels
            for image_path in glob(os.path.join(image_dir, '*')):
                # Generate a unique name using the global counter
                unique_image_name = f"{global_image_counter}.jpg"
                dest_image_path = os.path.join(output_dir, folder, 'images', unique_image_name)
                shutil.copy(image_path, dest_image_path)
                
                # Find the corresponding label file and rename it
                label_path = os.path.join(label_dir, os.path.splitext(os.path.basename(image_path))[0] + '.txt')
                if os.path.exists(label_path):
                    unique_label_name = f"{global_image_counter}.txt"  # Ensure the label name matches the image
                    dest_label_path = os.path.join(output_dir, folder, 'labels', unique_label_name)

                    # Update label content with the class mapping
                    updated_content = update_labels(label_path, class_mapping)
                    
                    with open(dest_label_path, 'w') as f:
                        f.write(updated_content)

                global_image_counter += 1  # Increment global counter for the next file

        # Update current_max_class for the next dataset
        current_max_class += num_classes

    # Create YAML configuration file
    generate_yaml_file(output_dir, current_max_class, class_names)

In [36]:
path=r"E:\My Research Project\CODE\DATA"

In [37]:
dataset_dirs = [r"Excavators", r"People and Ladders",r"Personal Protective Equipment"]

In [38]:
output_dir = r"E:\My Research Project\CODE\DATA\Combined_Dataset"

In [39]:
num_classes_list = [3,2,14]
class_names = [['EXCAVATORS', 'dump truck', 'wheel loader'],['Ladder', 'Person'],['Fall-Detected', 'Gloves', 'Goggles', 'Hardhat', 'Ladder', 'Mask', 'NO-Gloves', 'NO-Goggles', 'NO-Hardhat', 'NO-Mask', 'NO-Safety Vest', 'Person', 'Safety Cone', 'Safety Vest'],]


In [40]:
combine_datasets(path, dataset_dirs, num_classes_list, class_names, output_dir)
print("Datasets combined successfully.")

Processing dataset: Excavators
Processing dataset: People and Ladders
Processing dataset: Personal Protective Equipment
YAML configuration file created at: E:\My Research Project\CODE\DATA\Combined_Dataset\dataset_config.yaml
Datasets combined successfully.
