In [None]:
import os
from src.tools.image_slicer import create_image_slices
images_folder = "dataset/0.0.1/АФС для обработки ИИ"
image_paths = [f"{images_folder}/{image_path}" for image_path in os.listdir(images_folder)]
for image_path in image_paths:
    create_image_slices(
        image_path=image_path, 
        overlap_percentage=10, 
        destination_folder="slices",
        slice_size=512
    )

In [26]:
import torch
from ultralytics import YOLO
import os
import time
import sys
for module_name in list(sys.modules.keys()):
    if module_name.startswith('src.'):
        del sys.modules[module_name]
from src.config import MODEL_NAME, OVERLAPPING_PERCENTAGE, SLICE_SIZE, SLICES_FOLDER, PREDICT_FOLDER_PREFIX, CONFIDENCE_THRESHOLD, IOU_FOLDER_PREFIX, OUTLIER_FILTER_FOLDER_PREFIX, OUTLIER_THRESHOLD_K, IOU_THRESHOLD
print(MODEL_NAME, SLICE_SIZE, SLICES_FOLDER, PREDICT_FOLDER_PREFIX, CONFIDENCE_THRESHOLD, IOU_FOLDER_PREFIX, OUTLIER_FILTER_FOLDER_PREFIX, OUTLIER_THRESHOLD_K, IOU_THRESHOLD)
from src.model import get_devices, get_models, process_folders, process_folder, read_annotation_file, create_annotation_file
from src.tools.utils import get_destination_folder, create_destination_folder, get_slice_coordinates
from src.tools.image_slicer import create_image_slices

best_4 640 slices predicted_images_with_annotations 0.4 iou_filtered outlier_filtered 3 0.5


In [19]:

def get_devices():
    devices = []
    print(f"CUDA доступна: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"Название GPU: {torch.cuda.get_device_name(0)}")
        print(f"Количество GPU: {torch.cuda.device_count()}")
        print(f"Текущий GPU device: {torch.cuda.current_device()}")
        for i in range(torch.cuda.device_count()):
            devices.append(f'cuda:{i}')
        print(f"Доступные устройства: {devices}")
    else:
        print("CUDA недоступна, используется CPU")
        devices.append('cpu')
    return devices

def get_models(model_name, devices):
    models = []
    for device in devices:
        models.append(YOLO(f'{model_name}.pt'))
        models[-1].to(device)
        print(f"Модель загружена на: {device}")
    return models

def get_destination_folder(output_folder_prefix, model_name, confidence_threshold):
    return f"{output_folder_prefix}-{model_name}-{confidence_threshold}"

def create_destination_folder(output_folder_prefix, model_name, confidence_threshold):
    output_folder = get_destination_folder(output_folder_prefix, model_name, confidence_threshold)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    return output_folder

def copyfile(source_path, destination_path):
    if not os.path.exists(destination_path):
        os.makedirs(os.path.dirname(destination_path), exist_ok=True)
    with open(source_path, 'rb') as src, open(destination_path, 'wb') as dst:
        dst.write(src.read())
        
def copyfolder(source_folder, destination_folder):
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder, exist_ok=True)
    for item in os.listdir(source_folder):
        source_path = os.path.join(source_folder, item)
        destination_path = os.path.join(destination_folder, item)
        if os.path.isdir(source_path):
            copyfolder(source_path, destination_path)
        else:
            copyfile(source_path, destination_path)

def create_classes_file(output_folder):
    with open(f"{output_folder}/classes.txt", "w") as f:
        f.write("saiga\n")

def create_annotation_file(image_path, boxes, output_folder, image_size=512):
    if not os.path.exists(f"{output_folder}/classes.txt"):
        create_classes_file(output_folder)
    
    filename = image_path.split('/')[-1].split('.')[0]
    with open(f"{output_folder}/{filename}.txt", "w") as f:  # "w" вместо "a"
        for box in boxes:
            x_center = (box[0] + box[2]) / 2 / image_size
            y_center = (box[1] + box[3]) / 2 / image_size
            width = (box[2] - box[0]) / image_size
            height = (box[3] - box[1]) / image_size
            f.write(f"0 {x_center} {y_center} {width} {height}\n")

def process_images(model, images_folder, output_folder, device, confidence_threshold):
    print(f"Обработка изображений на устройстве: {device}")
    start_time = time.time()
    processed_count = 0
    total_detections = 0
    boxes_list = []
    for folder in os.listdir(images_folder):
        folder_path = f"{images_folder}/{folder}"
        if os.path.isdir(folder_path):
            for image_path in os.listdir(folder_path):
                full_image_path = f"{folder_path}/{image_path}"
                predictions = model.predict(full_image_path, conf=confidence_threshold, device=device, verbose=False)
                
                if len(predictions[0].boxes.conf) > 0:
                    boxes_data = {'source_image_path': full_image_path, 'coordinates': predictions[0].boxes.xyxy.tolist()}
                    boxes_list.append(boxes_data)
                    if not os.path.exists(f"{output_folder}/{folder}"):
                        os.makedirs(f"{output_folder}/{folder}")
                    final_image_path = f"{output_folder}/{folder}/{image_path}"
                    copyfile(full_image_path, final_image_path)
                    # predictions[0].save(final_image_path)
                    create_annotation_file(full_image_path, predictions[0].boxes.xyxy.tolist(), f"{output_folder}/{folder}")                  
                    processed_count += 1
                    total_detections += len(predictions[0].boxes.conf)
                    if processed_count % 10 == 0:  
                        print(f"Обработано: {processed_count} изображений с детекциями")

    end_time = time.time()
    print(f"\nГотово! Обработано {processed_count} изображений с детекциями")
    print(f"Общее количество детекций: {total_detections}")
    print(f"Время обработки: {end_time - start_time:.2f} секунд")
    print(f"Устройство: {device}")
    return boxes_list

In [28]:
devices = get_devices()
models = get_models(MODEL_NAME, devices)
folder_name = get_destination_folder([PREDICT_FOLDER_PREFIX, MODEL_NAME, CONFIDENCE_THRESHOLD, "3"])
output_folder = create_destination_folder(folder_name)
total_detections, boxes_list = process_folder(f"{SLICES_FOLDER}/2025_08_05_PhotoRieboR4_g201b201078_f003_094", models[1], devices[1], output_folder)

CUDA доступна: True
Название GPU: NVIDIA GeForce RTX 5090
Количество GPU: 2
Текущий GPU device: 0
Доступные устройства: ['cuda:0', 'cuda:1']
Модель загружена на: cuda:0
Модель загружена на: cuda:1


In [29]:
boxes_list

[{'source_image_path': 'slices/2025_08_05_PhotoRieboR4_g201b201078_f003_094/2025_08_05_PhotoRieboR4_g201b201078_f003_094_slice_000_005_2560_0.png',
  'coordinates': [[526.4727172851562,
    136.67703247070312,
    539.4025268554688,
    164.66281127929688],
   [317.580322265625, 495.9107666015625, 353.124267578125, 526.494384765625]]},
 {'source_image_path': 'slices/2025_08_05_PhotoRieboR4_g201b201078_f003_094/2025_08_05_PhotoRieboR4_g201b201078_f003_094_slice_000_006_3072_0.png',
  'coordinates': [[405.50146484375,
    168.5924835205078,
    443.0821533203125,
    206.41612243652344],
   [182.5170135498047, 468.04345703125, 220.55918884277344, 503.6771240234375],
   [14.467101097106934,
    136.69845581054688,
    27.39752197265625,
    164.64303588867188],
   [237.00765991210938,
    242.59219360351562,
    274.3644714355469,
    269.6678771972656],
   [192.36326599121094, 52.024169921875, 223.0604705810547, 103.9688720703125],
   [274.22943115234375,
    327.9288330078125,
    292.2

In [None]:
from src.tools.utils import get_destination_folder
if 'src.tools.utils' in sys.modules:
    del sys.modules['src.tools.utils']
predictions_destination_folder = get_destination_folder([PREDICT_FOLDER_PREFIX, MODEL_NAME, CONFIDENCE_THRESHOLD])
predictions_destination_folder

'predicted_images_with_annotations-best_4-0.4'

: 

In [None]:
folders = os.listdir(predictions_destination_folder)
outliers_destination_folder = get_destination_folder([PREDICT_FOLDER_PREFIX, MODEL_NAME, CONFIDENCE_THRESHOLD, OUTLIER_THRESHOLD_K])
for folder in folders:
    copyfolder(f"{predictions_destination_folder}/{folder}", f"{outliers_destination_folder}/{folder}")

In [None]:
boxes_list

NameError: name 'boxes_list' is not defined

In [None]:
print("boxes_list", len(boxes_list))
for boxes in boxes_list:
    x_min = int(boxes['source_image_path'].split('/')[-1].split('_')[-2].split('.')[0])
    y_min = int(boxes['source_image_path'].split('/')[-1].split('_')[-1].split('.')[0])

    for box in boxes['coordinates']:
        box[0] = box[0] + x_min
        box[1] = box[1] + y_min
        box[2] = box[2] + x_min
        box[3] = box[3] + y_min

boxes_list

In [None]:
# Импорты уже выполнены в предыдущей ячейке
def calculate_iou(box1, box2):
    # box = [x1, y1, x2, y2]
    x_left = max(box1[0], box2[0])
    y_top = max(box1[1], box2[1])
    x_right = min(box1[2], box2[2])
    y_bottom = min(box1[3], box2[3])

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    intersection_area = (x_right - x_left) * (y_bottom - y_top)
    print("intersection_area", intersection_area)
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union_area = box1_area + box2_area - intersection_area
    print("union_area", union_area)

    if union_area == 0:
        return 0.0

    return intersection_area / union_area


all_boxes = []
for box_group in boxes_list:
    source_path = box_group['source_image_path']
    for coord in box_group['coordinates']:
        all_boxes.append({
            'coordinates': coord,
            'source': source_path
        })

with_duplicates = len(all_boxes)
print(f"Всего боксов для сравнения: {with_duplicates}")
count = 0
for i in range(len(all_boxes)):
    for j in range(i + 1, len(all_boxes)):
        iou = calculate_iou(all_boxes[i]['coordinates'], all_boxes[j]['coordinates'])
        # print("iou", iou)
        if iou > IOU_THRESHOLD:
            print(f"Боксы с IoU > {IOU_THRESHOLD}:")
            print(f"Бокс 1: {all_boxes[i]['coordinates']} (из {all_boxes[i]['source']})")
            print(f"Бокс 2: {all_boxes[j]['coordinates']} (из {all_boxes[j]['source']})")         
            print(f"IoU: {iou:.4f}")
            print("---")
            count += 1

print(f"Количество боксов с IoU > {IOU_THRESHOLD}: {count}")
without_duplicates = len(all_boxes) - count
print(f"Количество боксов без дубликатов: {without_duplicates}")

In [None]:
# let's count outliers for each image
import numpy as np
print("boxes_list", len(boxes_list))

filtered_boxes_from_outliers = []
for boxes in boxes_list:
    areas = []
    for box in boxes['coordinates']:
        box_area = (box[2] - box[0]) * (box[3] - box[1])
        areas.append(box_area)
    mean_of_areas = sum(areas) / len(areas)

    q1 = np.percentile(areas, 25)
    q3 = np.percentile(areas, 75)
    iqr = q3 - q1
    lower_bound = q1 - OUTLIER_THRESHOLD_K * iqr
    upper_bound = q3 + OUTLIER_THRESHOLD_K * iqr
    filtered_coordinates_from_outliers = []
    for box in boxes['coordinates']:
        box_area = (box[2] - box[0]) * (box[3] - box[1])
        if box_area > lower_bound and box_area < upper_bound:
            filtered_coordinates_from_outliers.append(box)
    
    print("sum of areas", sum(areas))
    print("mean of areas", mean_of_areas)
    print("lower_bound", lower_bound)
    print("upper_bound", upper_bound)
    print("iqr", iqr)
    print("amount of filtered_coordinates_from_outliers", len(filtered_coordinates_from_outliers))
    if len(filtered_coordinates_from_outliers) > 0:
        filtered_boxes_from_outliers.append({'source_image_path': boxes['source_image_path'], 'coordinates': filtered_coordinates_from_outliers})

print("amount of filtered_boxes_from_outliers", len(filtered_boxes_from_outliers))
