In [30]:
import cv2
import os
import re
import numpy as np
import albumentations as A
import shutil
import os
import random
import shutil

In [None]:
directory = '../../data/starting_dataset/images'

for filename in os.listdir(directory):
    match = re.match(r'Лист \((\d+)\)\.jpg', filename)
    if match:
        number = match.group(1)
        new_filename = f'list_{number}.jpg'
        old_file = os.path.join(directory, filename)
        new_file = os.path.join(directory, new_filename)
        os.rename(old_file, new_file)
    else:
        print(f'Skipped: {filename}')

In [None]:
directory = '../../data/starting_dataset/defects'

for filename in os.listdir(directory):
    match = re.match(r'(\d+)_Лист \((\d+)\)_(.+)\.jpg', filename)
    if match:
        part1 = match.group(1)
        part2 = match.group(2)
        uuid = match.group(3)
        new_filename = f'{part1}_list_{part2}_{uuid}.jpg'
        old_file = os.path.join(directory, filename)
        new_file = os.path.join(directory, new_filename)
        os.rename(old_file, new_file)
    else:
        print(f'Skipped: {filename}')


In [4]:
def calculate_yolo_bbox(original_shape, cropped_shape, top_left):
    orig_height, orig_width = original_shape[:2]
    crop_height, crop_width = cropped_shape[:2]
    
    center_x = (top_left[0] + crop_width / 2) / orig_width
    center_y = (top_left[1] + crop_height / 2) / orig_height

    bbox_width = crop_width / orig_width
    bbox_height = crop_height / orig_height

    return center_x, center_y, bbox_width, bbox_height

In [14]:
original_images_dir = '../../data/starting_dataset/images'
cropped_images_dir = '../../data/starting_dataset/defects'
output_dir = '../../data/starting_dataset/labels'

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [15]:
for filename in os.listdir(cropped_images_dir):
    match = re.match(r'(\d+)_list_(\d+)_(.+)\.jpg', filename)
    if match:
        cropped_image_path = os.path.join(cropped_images_dir, filename)

        number = match.group(1)
        list_number = match.group(2)
        uuid = match.group(3)
        
        original_image_filename = f'list_{list_number}.jpg'
        original_image_path = os.path.join(original_images_dir, original_image_filename)

        original_image = cv2.imread(original_image_path)
        cropped_image = cv2.imread(cropped_image_path)

        if original_image is None or cropped_image is None:
            print(f"Error: Couldn't load image(s) for {filename}")
            continue

        result = cv2.matchTemplate(original_image, cropped_image, cv2.TM_CCOEFF_NORMED)
        _, _, _, max_loc = cv2.minMaxLoc(result)

        yolo_bbox = calculate_yolo_bbox(original_image.shape, cropped_image.shape, max_loc)

        annotation_filename = f'list_{list_number}.txt'
        annotation_path = os.path.join(output_dir, annotation_filename)
    
        with open(annotation_path, 'a') as f:
            f.write(f"0 {yolo_bbox[0]} {yolo_bbox[1]} {yolo_bbox[2]} {yolo_bbox[3]}\n")

        print(f"Annotation created for {filename}")

    else:
        print(f"Skipped: {filename}")

Annotation created for 0_list_114_498bd161-204b-4594-b5b1-2e643de6770b.jpg
Annotation created for 0_list_115_be5c0d43-f8de-4178-ac1d-9b6ad228975e.jpg
Annotation created for 10_list_23_6f2337ce-9aba-44a7-8e97-514f96193162.jpg
Annotation created for 11_list_23_890497ed-9439-4dd6-8490-cea696e8a0b7.jpg
Annotation created for 11_list_96_8cb31424-b0b4-4689-809f-97e9dfd129fe.jpg
Annotation created for 12_list_63_50437b1b-2eb6-43df-95c2-b2c13e21d4de.jpg
Annotation created for 13_list_50_1815bfb2-8f30-411d-b7f5-cfc31d63540a.jpg
Annotation created for 13_list_94_ecbef8b2-72cb-4d6c-9fd5-e60f7071e837.jpg
Annotation created for 14_list_34_6f169b74-0f32-43cc-aff8-4dd3c453d3d2.jpg
Annotation created for 15_list_34_fa118a77-021f-4f13-ba90-12d359a0b551.jpg
Annotation created for 15_list_45_3a6813d9-876a-4a19-8509-df593b46df2d.jpg
Error: Couldn't load image(s) for 15_list_70_b106323e-6026-452e-a4bb-2ce0283a92c7.jpg
Error: Couldn't load image(s) for 15_list_87_e6c246cc-b496-4f0b-aab4-c9de15e7fd00.jpg
Ann

отсуствуют картинки 110, 10, 116, 18, 70, 87

In [21]:
images_dir = '../../data/starting_dataset/images'
labels_dir = '../../data/starting_dataset/labels'
for filename in os.listdir(images_dir):
    filename = filename.replace('.jpg', '.txt')
    file_path = os.path.join(labels_dir, filename)
    if not os.path.exists(file_path):
        with open(file_path, 'w') as f:
            pass

In [31]:
aug = A.Compose([
    A.RandomBrightnessContrast(p=0.2),
    A.HueSaturationValue(p=0.2),
    A.GaussianBlur(p=0.1),
])

def read_label_file(file_path):
    with open(file_path, 'r') as f:
        labels = f.readlines()
    return labels

def write_label_file(file_path, labels):
    with open(file_path, 'w') as f:
        f.writelines(labels)

def augment_images_and_labels(image_dir, label_dir, output_image_dir, output_label_dir, num_augmentations=10):
    image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]

    for image_file in image_files:
        base_name = os.path.splitext(image_file)[0]
        image_path = os.path.join(image_dir, image_file)
        label_path = os.path.join(label_dir, base_name + '.txt')

        shutil.copy(image_path, os.path.join(output_image_dir, image_file))
        shutil.copy(label_path, os.path.join(output_label_dir, base_name + '.txt'))

        image = cv2.imread(image_path)
        labels = read_label_file(label_path)

        for i in range(num_augmentations):
            augmented = aug(image=image)
            augmented_image = augmented['image']

            if isinstance(augmented_image, np.ndarray):
                aug_image_file = f"{base_name}_aug_{i}.jpg"
                aug_image_path = os.path.join(output_image_dir, aug_image_file)
                cv2.imwrite(aug_image_path, augmented_image)

                aug_label_file = f"{base_name}_aug_{i}.txt"
                aug_label_path = os.path.join(output_label_dir, aug_label_file)
                write_label_file(aug_label_path, labels)
            else:
                print(f"Augmentation failed for {image_file}")

image_dir = '../../data/starting_dataset/images'
label_dir = '../../data/starting_dataset/labels'
output_image_dir = '../../data/aug_dataset/images'
output_label_dir = '../../data/aug_dataset/labels'

os.makedirs(output_image_dir, exist_ok=True)
os.makedirs(output_label_dir, exist_ok=True)

augment_images_and_labels(image_dir, label_dir, output_image_dir, output_label_dir)

In [32]:
def split_dataset(image_dir, label_dir, train_image_dir, train_label_dir, val_image_dir, val_label_dir, val_split=0.2):
    image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
    random.shuffle(image_files)

    num_val = int(len(image_files) * val_split)
    val_files = image_files[:num_val]
    train_files = image_files[num_val:]

    for image_file in train_files:
        base_name = os.path.splitext(image_file)[0]
        label_file = base_name + '.txt'

        shutil.copy(os.path.join(image_dir, image_file), os.path.join(train_image_dir, image_file))
        shutil.copy(os.path.join(label_dir, label_file), os.path.join(train_label_dir, label_file))

    for image_file in val_files:
        base_name = os.path.splitext(image_file)[0]
        label_file = base_name + '.txt'

        shutil.copy(os.path.join(image_dir, image_file), os.path.join(val_image_dir, image_file))
        shutil.copy(os.path.join(label_dir, label_file), os.path.join(val_label_dir, label_file))

image_dir = '../../data/aug_dataset/images'
label_dir = '../../data/aug_dataset/labels'
train_image_dir = '../../data/aug_dataset/train/images'
train_label_dir = '../../data/aug_dataset/train/labels'
val_image_dir = '../../data/aug_dataset/val/images'
val_label_dir = '../../data/aug_dataset/val/labels'

os.makedirs(train_image_dir, exist_ok=True)
os.makedirs(train_label_dir, exist_ok=True)
os.makedirs(val_image_dir, exist_ok=True)
os.makedirs(val_label_dir, exist_ok=True)

split_dataset(image_dir, label_dir, train_image_dir, train_label_dir, val_image_dir, val_label_dir)

In [25]:
def draw_boxes(image_path, label_path, class_names):
    image = cv2.imread(image_path)
    height, width, _ = image.shape
    
    with open(label_path, 'r') as file:
        labels = file.readlines()
        
    for label in labels:
        parts = label.strip().split()
        class_id = int(parts[0])
        center_x = float(parts[1]) * width
        center_y = float(parts[2]) * height
        box_width = float(parts[3]) * width
        box_height = float(parts[4]) * height

        x1 = int(center_x - box_width / 2)
        y1 = int(center_y - box_height / 2)
        x2 = int(center_x + box_width / 2)
        y2 = int(center_y + box_height / 2)
        
        color = (255, 0, 0)
        thickness = 2 
        cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)
        
        label_text = class_names[class_id]
        cv2.putText(image, label_text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
    
    return image

image_dir = '../../data/starting_dataset/images'
label_dir = '../../data/starting_dataset/labels'
output_dir = 'output'
class_names = ['defect']

os.makedirs(output_dir, exist_ok=True)

image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]

for image_file in image_files:
    base_name = os.path.splitext(image_file)[0]
    image_path = os.path.join(image_dir, image_file)
    label_path = os.path.join(label_dir, base_name + '.txt')

    if os.path.exists(label_path):
        image_with_boxes = draw_boxes(image_path, label_path, class_names)
        
        output_path = os.path.join(output_dir, image_file)
        cv2.imwrite(output_path, image_with_boxes)
    else:
        print(f"Label file not found for {image_file}")

print("Bounding boxes visualization completed.")

Bounding boxes visualization completed.
