In [270]:
import os

import cv2
import numpy as np
from ultralytics import YOLO

import easyocr

import time

In [27]:
INPUT_PATH = r"..\..\..\train_atom\train\imgs"
OUTPUT_PATH = r"..\..\..\output_imgs"

YOLO_MODEL_PATH = r"..\models\yolov11.pt"
OCR_MODEL_PARAMS = {
    "lang_list": ['en', 'ru'],
    "gpu": True
}

In [30]:
YOLO_MODEL = YOLO(YOLO_MODEL_PATH)
OCR_MODEL = easyocr.Reader(**OCR_MODEL_PARAMS)

In [4]:
# получение путей до всех изображений
img_folder_path = r"..\..\..\train_atom\train\imgs"
list_of_img_folder_path = os.listdir(img_folder_path)

In [5]:
yolo_model = YOLO(r'..\models\yolov11.pt')

In [None]:
res = yolo_model.predict(os.path.join(img_folder_path, list_of_img_folder_path[0]))


image 1/1 c:\VUZ\ocr_metal_hack\Winstrike-marking\neural\experiments\..\..\..\train_atom\train\imgs\1.JPG: 640x480 2 markings, 69.6ms
Speed: 4.5ms preprocess, 69.6ms inference, 97.7ms postprocess per image at shape (1, 3, 640, 480)


In [254]:
# Функция для детекции и отрисовки рамок
def detect_and_draw_boxes(image_path : str, yolo_model, save_path):
    
    # Загружаем изображение
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Выполняем детекцию
    results = yolo_model(image_rgb)

    # Копируем изображение для отрисовки
    image_copy = image_rgb.copy()

    text_imgs = []

    i = 1

    # Проходимся по результатам детекции
    for result in results[0].boxes.data:
        x_min, y_min, x_max, y_max, confidence, class_id = result[:6].int().tolist()

        # сохраняем изображение с текстом
        text_imgs.append(image_copy[y_min:y_max, x_min:x_max])

        # Рисуем прямоугольник
        cv2.rectangle(image_copy, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)


    cv2.imwrite(os.path.join(save_path, image_path.split("\\")[-1]), image_copy)
    
    transformed_images = transform_images(text_imgs)

    text_result_dict = {}

    for key, value in transformed_images.items():
        text_result = []

        for detected_contour_text in ocr_detect(OCR_MODEL, text_imgs):
            for text_detect in detected_contour_text:
                text_result.append(text_detect[1]) 

        text_result_dict[key] = text_result


    return text_result_dict

In [236]:
def ocr_detect(ocr_model : easyocr.Reader, imgs_list):
    """  """
    results = []
    for img in imgs_list:
        results.append(ocr_model.readtext(img))
    
    return results

In [263]:
def transform_images(images_list : list):
    transformed_images = {
        "left" : [],
        "right" : [],
        "default" : [],
        "vertical" : []
    }

    for image in images_list:
        transformed_images["default"].append(image)

        # Поворот на 90 градусов влево с изменением размерности
        left_img = np.transpose(image.copy(), (1, 0, 2))
        left_img = cv2.flip(left_img, 0)
        transformed_images["left"].append(left_img)
        
        # Поворот на 90 градусов влево с изменением размерности
        right_img = np.transpose(image.copy(), (1, 0, 2))
        right_img = cv2.flip(right_img, 1)
        transformed_images["right"].append(right_img)

        # Горизонтальное отражение
        vertical_rgb = cv2.flip(image.copy(), 0)
        transformed_images["vertical"].append(vertical_rgb)

    return transformed_images

In [264]:
res = transform_images([cv2.imread(r"..\..\..\train_atom\train\imgs\423.JPG")])

In [277]:
start = time.time()
res = detect_and_draw_boxes(r"..\..\..\train_atom\train\imgs\410.JPG", YOLO_MODEL, OUTPUT_PATH)
res = detect_and_draw_boxes(r"..\..\..\train_atom\train\imgs\1.JPG", YOLO_MODEL, OUTPUT_PATH)
res = detect_and_draw_boxes(r"..\..\..\train_atom\train\imgs\423.JPG", YOLO_MODEL, OUTPUT_PATH)
res = detect_and_draw_boxes(r"..\..\..\train_atom\train\imgs\169.JPG", YOLO_MODEL, OUTPUT_PATH)
res = detect_and_draw_boxes(r"..\..\..\train_atom\train\imgs\348.JPG", YOLO_MODEL, OUTPUT_PATH)
print(time.time() - start)


0: 640x480 1 marking, 305.4ms
Speed: 3.0ms preprocess, 305.4ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 2 markings, 22.9ms
Speed: 2.0ms preprocess, 22.9ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 marking, 21.1ms
Speed: 2.0ms preprocess, 21.1ms inference, 5.3ms postprocess per image at shape (1, 3, 640, 480)

0: 640x480 1 marking, 20.5ms
Speed: 1.5ms preprocess, 20.5ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 480)

0: 480x640 1 marking, 20.9ms
Speed: 3.1ms preprocess, 20.9ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)
2.208617925643921


In [278]:
res

{'left': ['7', 'а8', '8', '9'],
 'right': ['7', 'а8', '8', '9'],
 'default': ['7', 'а8', '8', '9'],
 'vertical': ['7', 'а8', '8', '9']}

In [267]:
res = ocr_detect(OCR_MODEL, [cv2.imread(r"..\..\..\train_atom\train\imgs\1.JPG")])

In [24]:
# СТРАТ ОТРИСОВКИ
for img_path in list_of_img_folder_path:
    try:
        detect_and_draw_boxes(os.path.join(img_folder_path, img_path), yolo_model, r"..\..\..\train_atom\yolo_save_imgs")
    except:
        print(f"[ERROR] {img_path}")


0: 640x480 2 markings, 158.3ms
Speed: 3.6ms preprocess, 158.3ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 480)
[[[ 33  38  31]
  [ 29  34  27]
  [ 41  43  38]
  ...
  [ 61  62  54]
  [ 70  73  66]
  [ 96  99  92]]

 [[ 43  48  41]
  [ 51  56  49]
  [ 60  63  56]
  ...
  [ 60  61  53]
  [ 76  79  72]
  [ 87  90  83]]

 [[110 115 108]
  [139 144 137]
  [147 148 142]
  ...
  [ 77  78  70]
  [ 87  90  83]
  [ 76  79  72]]

 ...

 [[159 161 156]
  [199 201 196]
  [193 196 189]
  ...
  [ 33  33  25]
  [ 30  30  22]
  [ 26  26  18]]

 [[158 160 155]
  [189 191 186]
  [205 208 201]
  ...
  [ 41  41  33]
  [ 35  35  27]
  [ 29  29  21]]

 [[159 161 156]
  [167 169 164]
  [184 187 180]
  ...
  [ 49  49  41]
  [ 50  51  43]
  [ 49  50  42]]]
[[[175 180 173]
  [176 181 174]
  [180 185 178]
  ...
  [ 45  48  39]
  [ 54  57  48]
  [ 49  54  47]]

 [[170 175 168]
  [174 179 172]
  [172 177 170]
  ...
  [ 46  49  40]
  [ 47  50  41]
  [ 53  58  51]]

 [[164 169 162]
  [178 183 176]
