# Augmentation for YOLO Model

In [19]:
import os
import cv2
import json
import albumentations as A

# Define the augmentation pipeline
augmentations = A.Compose(
    [
        # High probability geometric transformations
        A.HorizontalFlip(p=0.7),
        A.ShiftScaleRotate(shift_limit=0.01, scale_limit=0.02, rotate_limit=10, p=0.7),

        # Moderate probability cropping
        #A.RandomResizedCrop(height=500, width=500, scale=(0.8, 1.0), ratio=(0.9, 1.1), p=0.2),
        A.GaussianBlur(blur_limit=(1, 3), p=0.3),
        # Additional perspective distortion
        A.Perspective(scale=(0.01, 0.02), p=0.4)
    ],
    bbox_params=A.BboxParams(format='pascal_voc', label_fields=['category_ids'])
)

# File paths
input_dir = r".\Our_Dataset\Combined_Pcb_Images"
output_dir = r".\Our_Dataset\Augmented_Combined_Data"
os.makedirs(output_dir, exist_ok=True)

# Helper function to compute rectangle from diagonal
def diagonal_to_rectangle(diagonal):
    (x1, y1), (x2, y2) = diagonal
    return [min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2)]

# Helper function to clip bounding boxes
def clip_bbox(bbox, img_width, img_height):
    x_min, y_min, x_max, y_max = bbox
    return [
        max(0, min(x_min, img_width)),
        max(0, min(y_min, img_height)),
        max(0, min(x_max, img_width)),
        max(0, min(y_max, img_height))
    ]

# Process each image and its JSON
for filename in os.listdir(input_dir):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        image_path = os.path.join(input_dir, filename)
        json_path = os.path.join(input_dir, filename.replace('.jpg', '.json').replace('.png', '.json'))
        
        # Load image
        image = cv2.imread(image_path)
        height, width = image.shape[:2]

        # Load JSON
        with open(json_path, 'r') as f:
            data = json.load(f)

        # Extract bounding boxes
        bboxes = []
        category_ids = []
        for shape in data['shapes']:
            if shape['shape_type'] == 'rectangle':
                diagonal = shape['points']
                bboxes.append(diagonal_to_rectangle(diagonal))
                category_ids.append(shape['label'])

        # Apply augmentations
        try:
            augmented = augmentations(image=image, bboxes=bboxes, category_ids=category_ids)
            augmented_image = augmented['image']
            augmented_bboxes = augmented['bboxes']
        except ValueError as e:
            print(f"Skipping {filename} due to invalid bbox: {e}")
            continue

        # Clip bounding boxes to image boundaries
        clipped_bboxes = [clip_bbox(bbox, width, height) for bbox in augmented_bboxes]

        # Convert rectangles back to diagonals
        augmented_shapes = [
            {
                "label": category_id,
                "points": [[x_min, y_min], [x_max, y_max]],
                "shape_type": "rectangle"
            }
            for (x_min, y_min, x_max, y_max), category_id in zip(clipped_bboxes, category_ids)
        ]

        data['shapes'] = augmented_shapes

        # Save augmented image and JSON
        augmented_image_path = os.path.join(output_dir, filename)
        augmented_json_path = os.path.join(output_dir, filename.replace('.jpg', '.json').replace('.png', '.json'))
        
        cv2.imwrite(augmented_image_path, augmented_image)
        with open(augmented_json_path, 'w') as f:
            json.dump(data, f, indent=4)

print("Data augmentation completed!")


Data augmentation completed!


# Augmentation for Cropped_components

### Existing Distribution 

In [38]:
def count_images_in_folders(base_folder):
    fault_counts = {}
    
    # Iterate through subfolders in the base folder
    for label_folder in os.listdir(base_folder):
        label_path = os.path.join(base_folder, label_folder)
        
        # Check if it's a directory
        if os.path.isdir(label_path):
            # Count the number of image files in the folder
            image_count = len([file for file in os.listdir(label_path) if file.endswith(('.png', '.jpg', '.jpeg'))])
            fault_counts[label_folder] = image_count
    
    # Print the results
    for fault, count in fault_counts.items():
        print(f"{fault}: {count} images")
    
    return fault_counts

# Specify the output folder where cropped images are stored
output_folder = r".\Our_Dataset\Cropped_Components"
fault_image_counts = count_images_in_folders(output_folder)

Component Placing Problem: 559 images
Good Solder: 1561 images
Improper Solder: 564 images
Missing Component: 369 images
Tombstone: 49 images
Unknown fault: 39 images
Wrong Component: 116 images


### Augment to the required distribution

In [43]:
import os
import shutil
import cv2
import albumentations as A
import random

# Define the classes and their target image counts
custom_targets = {
    "Component Placing Problem": 800,
    "Good Solder": 1000,
    "Improper Solder": 800,
    "Missing Component": 500,
    "Tombstone": 100,
    "Wrong Component": 200
}

# Define augmentation pipeline
augmentations = A.Compose(
    [
        A.HorizontalFlip(p=0.7),
        A.ShiftScaleRotate(shift_limit=0.01, scale_limit=0.02, rotate_limit=10, p=0.7),
        A.GaussianBlur(blur_limit=(1, 3), p=0.3),
        A.Perspective(scale=(0.01, 0.02), p=0.2),
    ]
)

# Define paths
input_dir = r".\\Our_Dataset\\Cropped_Components"
output_dir = r".\\Our_Dataset\\Augmented_Cropped_Components"
os.makedirs(output_dir, exist_ok=True)

# Copy all folders from input to output
for folder_name in os.listdir(input_dir):
    src_folder = os.path.join(input_dir, folder_name)
    dest_folder = os.path.join(output_dir, folder_name)

    if os.path.isdir(src_folder):
        shutil.copytree(src_folder, dest_folder, dirs_exist_ok=True)

# Process each class
for class_name, target_count in custom_targets.items():
    class_folder = os.path.join(output_dir, class_name)

    if os.path.isdir(class_folder):
        images = [f for f in os.listdir(class_folder) if f.endswith(('.jpg', '.png'))]
        current_count = len(images)

        print(f"Processing class '{class_name}' with {current_count} images...")

        if current_count < target_count:
            # Augment images to reach the target count
            while current_count < target_count:
                image_name = random.choice(images)
                image_path = os.path.join(class_folder, image_name)

                # Load image
                image = cv2.imread(image_path)

                # Apply augmentations
                augmented = augmentations(image=image)
                augmented_image = augmented['image']

                # Save augmented image
                augmented_image_name = f"{os.path.splitext(image_name)[0]}_aug{current_count}.jpg"
                augmented_image_path = os.path.join(class_folder, augmented_image_name)
                cv2.imwrite(augmented_image_path, augmented_image)

                current_count += 1

        elif current_count > target_count:
            # Undersample images to reach the target count
            images_to_remove = current_count - target_count
            images = random.sample(images, len(images))  # Shuffle images

            for i in range(images_to_remove):
                image_path = os.path.join(class_folder, images[i])
                os.remove(image_path)

        print(f"Finished processing class '{class_name}' to {target_count} images.\n")

print("All classes processed as per the custom targets!")


Processing class 'Component Placing Problem' with 559 images...
Finished processing class 'Component Placing Problem' to 800 images.

Processing class 'Good Solder' with 1561 images...
Finished processing class 'Good Solder' to 1000 images.

Processing class 'Improper Solder' with 564 images...
Finished processing class 'Improper Solder' to 800 images.

Processing class 'Missing Component' with 369 images...
Finished processing class 'Missing Component' to 500 images.

Processing class 'Tombstone' with 89 images...
Finished processing class 'Tombstone' to 100 images.

Processing class 'Wrong Component' with 116 images...
Finished processing class 'Wrong Component' to 200 images.

All classes processed as per the custom targets!


In [45]:
def count_images_in_folders(base_folder):
    fault_counts = {}
    
    # Iterate through subfolders in the base folder
    for label_folder in os.listdir(base_folder):
        label_path = os.path.join(base_folder, label_folder)
        
        # Check if it's a directory
        if os.path.isdir(label_path):
            # Count the number of image files in the folder
            image_count = len([file for file in os.listdir(label_path) if file.endswith(('.png', '.jpg', '.jpeg'))])
            fault_counts[label_folder] = image_count
    
    # Print the results
    for fault, count in fault_counts.items():
        print(f"{fault}: {count} images")
    
    return fault_counts

# Specify the output folder where cropped images are stored
output_folder = r".\Our_Dataset\Augmented_Cropped_Components"
fault_image_counts = count_images_in_folders(output_folder)

Component Placing Problem: 800 images
Good Solder: 1000 images
Improper Solder: 800 images
Missing Component: 500 images
Tombstone: 100 images
Unknown fault: 39 images
Wrong Component: 200 images
