# Полный запуск решения

## Импорт библиотек

In [1]:
import io
import os
import pickle
import yaml
from typing import List, Dict
from pathlib import Path

import pandas as pd
import numpy as np
from PIL import Image
from sentence_transformers import SentenceTransformer
from loguru import logger
from tqdm import tqdm
from sklearn.neighbors import NearestNeighbors
import logging
import warnings

import Levenshtein
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from ultralytics import YOLO
from sklearn.neighbors import NearestNeighbors
import cv2
from dataclasses import dataclass
import abc
from paddleocr import PaddleOCR
from sklearn.metrics import accuracy_score, pairwise_distances

config_path = 'config.yaml'

  from tqdm.autonotebook import tqdm, trange
2024-11-10 10:51:03.872586: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-10 10:51:03.886858: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1731225063.907022  873270 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1731225063.911912  873270 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-10 10:51:03.936230: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow b

In [2]:
warnings.filterwarnings("ignore")
logging.getLogger("ppocr").setLevel(logging.ERROR)


os.makedirs("weights/embed", exist_ok=True)
os.makedirs("test/imags", exist_ok=True)

## Для дальнейшей работы, нужно скачать веса, и положить в нужные папки
train_embed положить в weights/embeded  
https://drive.google.com/file/d/154jS1mS7ca43gm1eSu_DhzP7Y7f7eCHU/view?usp=sharing

segmentation.pt положить в weights/  
https://drive.google.com/file/d/1Rssq6iwe8ExxcSG7hnjz1UZiieUDkwVh/view?usp=sharing

best_det.pt положить в weights/  
https://drive.google.com/file/d/10sPV7AW10ugb_oIjbPES5bUWP3wDRn1J/view?usp=sharing

# Фотографии, информации о боксах, и текст для тейна должны лежать в папке train в папках imgs, labels, labels_with_text соотвотсвенно: train/imgs - фотографии, train/labels - информация о боксах, tarin/labels_with_text - текст 

# Также для работы необходим config.yaml

# Тестовые фотографи следует положить в папку test/imags !

## Получение Эмбедингов для тестовых фото

In [3]:
def load_images_from_folder(output_folder: str) -> List[np.ndarray]:
    
    frame_files = sorted(os.listdir(output_folder))
    frames = []
    for frame_file in frame_files:
        frame_path = os.path.join(output_folder, frame_file)
        if os.path.isfile(frame_path) and frame_file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
            try:
                img = Image.open(frame_path)
                frames.append(np.array(img))
            except Exception as e:
                logger.eror(f"Error opening {frame_path}: {e}")
        else:
            logger.eror(f"Skipping directory or non-image file: {frame_path}")
    
    return frames

def save_embeddings(embeddings, filename, output_folder):
    output_path = Path(output_folder) / f"{filename}.pkl"
    with open(output_path, 'wb') as f:
        pickle.dump(embeddings, f)
    logger.info(f"Saved embeddings to {output_path}")

def vectorize_images(images: List[np.ndarray], model: SentenceTransformer) -> List[np.ndarray]:
    return [model.encode(Image.fromarray(img)) for img in tqdm(images)]

In [4]:
with open(config_path, 'r') as file:
    config = yaml.safe_load(file)
logger.info("Loaded configuration from {}", config_path)

test_images = load_images_from_folder(config['test_images_folder'])
logger.info("Loaded test: {}", config['test_images_folder'])

model = SentenceTransformer(config['model_name'])
logger.info("Loaded model: {}", config['model_name'])

test_embeddings = vectorize_images(test_images, model)

save_embeddings(test_embeddings, 'test_emb', config['emb_output_folder'])

logger.info("Saved embeddings for test images.")


[32m2024-11-10 10:51:07.227[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m3[0m - [1mLoaded configuration from config.yaml[0m
[32m2024-11-10 10:51:09.790[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mLoaded test: test/imags[0m
[32m2024-11-10 10:51:11.137[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m9[0m - [1mLoaded model: clip-ViT-B-16[0m
 49%|████▊     | 55/113 [00:10<00:12,  4.62it/s]

# Сегментация

In [5]:
class Segmentation:
    def __init__(self, weights_yolo_seg_path: str):
        self.model_seg = YOLO(weights_yolo_seg_path)
        
    def get_segmentation(self) -> None:
        result = self.model_seg(self.image, conf=0.7)   
        if len(result[0]):
            object_masks = np.array(result[0].masks.xy, dtype=object)
            self.data["segment_points"] = object_masks 
        else:
            self.data["segment_points"] = []

    def make_filter_detect(self):
        image = np.array(self.image)
        mask = self.data["mask"]
        mask_bin = (mask > 0).astype(np.uint8) * 255
        contours, _ = cv2.findContours(mask_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cv2.drawContours(image, contours, -1, (0, 0, 255), thickness=5)
        mask = np.repeat(mask[:, :, np.newaxis], 3, axis=2)
        mask = 0.5 * (mask > 0) + 0.5
        new_image = (image * mask).astype("int32")
        cv2.imwrite('output_image.jpg', new_image)

## Детекця

In [6]:
class Detection:
    def __init__(self, weights_yolo_det_path: str):
        self.model_det = YOLO(weights_yolo_det_path)
        
    def get_detection(self) -> None:
        result = self.model_det(self.image)
        if len(result[0]):
            object_box = np.array(result[0].boxes.xywhn.to("cpu").detach().numpy(), dtype=object)
            self.data["box_xywhn"] = ['\n'.join([f"{0} {x} {y} {w} {h}" for x, y, w, h in object_box])]
        else:
            self.data["box_xywhn"] = []

## OCR

In [7]:
class OCR(Segmentation, Detection):
    def __init__(self, 
                 weights_yolo_seg_path: str, 
                 weights_yolo_det_path: str,
                 image: Image.Image):
        Segmentation.__init__(self, weights_yolo_seg_path)
        Detection.__init__(self, weights_yolo_det_path)
        self.ocr = PaddleOCR(use_gpu=True, lang="en")  
        self.image = image
        self.data = {}
        self.get_segmentation()
        self.get_detection()
        self.crop_one_img()
        self.ocr_one_img()
        self.make_filter_detect()

    def get_mask(self) -> np.array:
        mask = np.zeros((self.image.size[1], self.image.size[0]), dtype=np.uint8)
        for object in self.data["segment_points"]:
            points = np.array(
                [[x, y] for x, y in object], dtype=np.int32
            )
            mask = cv2.fillPoly(mask, [points], color=255)
        self.data["mask"] = mask
        
        return mask
    
    def crop_one_img(self) -> None:
        mask = (np.array(self.get_mask()) > 0)
        mask = np.expand_dims(mask, axis=-1)
        image = self.image * mask
        if len(self.data["segment_points"]):
            x = np.array([x for obj in self.data["segment_points"] for x, y in obj])
            y = np.array([y for obj in self.data["segment_points"] for x, y in obj])
            x_min, x_max = int(min(x)), int(max(x))
            y_min, y_max = int(min(y)), int(max(y))
            self.data["crop_img"] = image[y_min:y_max, x_min:x_max, :]
        else:
            self.data["crop_img"] = image
    
    def ocr_one_img(self) -> None:
        crop_image = np.array(self.data["crop_img"])
        orig_image = np.array(self.image)

        # result = self.ocr.ocr(crop_image, rec=True)
        # if result[0]:
        #     self.data["rec_crop"] = [line[1][0] for line in result[0]]
        # else:
        #     self.data["rec_crop"] = ["None"]

        result = self.ocr.ocr(orig_image, rec=True)
        if result[0]:
            self.data["rec_orig"] = [line[1][0] for line in result[0]]
        else:
            self.data["rec_orig"] = ["None"]

    def get_text(self) -> Dict[str, List[str]]:
        dict_text = {
            "text_orig_img": self.data["rec_orig"],
            # "text_crop_img": self.data["rec_crop"],
        }
        return dict_text

In [8]:
@dataclass
class PredictResult:
    raw_text: str = None
    # image in bytes with boxes and text on it
    pred_img: str = None
    # unknow data from excel, None if search_in_data is False
    attribute1: str | None = None
    attribute2: str | None = None
    attribute3: str | None = None

class BaseModel(abc.ABC):

    @abc.abstractmethod
    def predict(
        self, image: Image.Image, search_in_data: bool, dist_threshold: float
    ) -> PredictResult:
        """Get predict from ML OCR Model

        Parameters
        ----------
        images : Image.Image
            List with images to be predicted
        search_in_data : bool
            Flag, if true, get missing data from excel file
        dist_threshold : float
            Distance threshold to cut out unknown images

        Returns
        -------
        PredictResult
            If search_in_data is True, returns full data from excel
            If False, return only OCR result
        """
        pass

In [9]:
class OcrBD():

    def __init__(self) -> None:
        self.model = SentenceTransformer("clip-ViT-B-16")
        self.emb_output_folder = "embeddings_vit"
        self.test_images_folder = "test/images"
        self.train_labels_folder = "train/labels"
        self.train_labels_with_text_folder = "train/labels_with_text"
        self.config_path = "config.yaml"
        with open(self.config_path, 'r') as file:
            self.config = yaml.safe_load(file)
        logger.info("Loaded configuration from {}", self.config_path)


    def load_embeddings_from_folder(self, folder: str) -> tuple[List[np.ndarray], List[np.ndarray]]:
        for filename in os.listdir(folder):
            emb_path = os.path.join(folder, filename)
    
            if os.path.isfile(emb_path):
                with open(emb_path, 'rb') as f:
                    embedding = pickle.load(f)
    
                    if 'test' in filename.lower():
                        test_embeddings = embedding
                    elif 'train' in filename.lower():
                        train_embeddings = embedding
    
        return train_embeddings
        
    def vectorize_img(self, image: Image.Image) -> np.ndarray:
        return [self.model.encode(image)]

    def load_image_filenames(self, images_folder: str) -> List[str]:
        image_filenames = []
        for filename in sorted(os.listdir(images_folder)):
            if filename.lower().endswith(('png', 'jpg', 'jpeg', 'bmp', 'gif', 'bbox', 'txt')): 
                image_filenames.append(filename)
        return image_filenames

    def find_nearest_neighbors(self,
                               test_embeddings: List[np.ndarray], 
                               train_embeddings: List[np.ndarray], 
                               n_neighbors: int, 
                               threshold: float) -> List[List[int]]:
        test_embeddings = np.array(test_embeddings)
        train_embeddings = np.array(train_embeddings)
        nn = NearestNeighbors(n_neighbors=n_neighbors, algorithm='ball_tree')
        nn.fit(train_embeddings)  
        
        neighbors_indices = []
        for test_emb in test_embeddings:
            distances, indices = nn.kneighbors([test_emb])  
            valid_indices = [idx for dist, idx in zip(distances[0], indices[0]) if dist < threshold]
            
            if valid_indices:
                neighbors_indices.append(valid_indices[0])
            else:
                neighbors_indices.append(None)
    
        return neighbors_indices 
        
    def load_labels(self, labels_folder: str, file_extension: str, train_filenames: List[str]) -> List[str]:
        labels = []
        train_filenames_base = [filename.split('.')[0] for filename in train_filenames]
        
        for filename in sorted(os.listdir(labels_folder)):
            if filename.split('.')[-1] == file_extension.lstrip('.') and filename.split('.')[0] in train_filenames_base:
                with open(os.path.join(labels_folder, filename), 'r') as file:
                    # Читаем строки и добавляем `\n`, если его нет
                    content = ''.join(line if line.endswith('\n') else line + '\n' for line in file.readlines())
                    labels.append(content)
                    
        return labels

    def predict(self, image: Image.Image, search_in_data: bool, dist_threshold: float) -> PredictResult:
        config = self.config
    
        train_embeddings = self.load_embeddings_from_folder(config['emb_output_folder'])
        test_embedings = self.vectorize_img(image)
        
        logger.info("Embeddings were read")
    
        test_filenames = self.load_image_filenames(config['test_images_folder'])
        train_filenames = self.load_image_filenames(config['train_images_folder'])
    
        train_labels = self.load_labels(config['train_labels_folder'], '.txt', train_filenames)
        train_labels_with_text = self.load_labels(config['train_labels_with_text_folder'], '.bbox', train_filenames)
        logger.info("train_labels and train_labels_with_text were read")
        
        logger.info("Test image filenames were read")
    
        n_neighbors = config['n_neighbors']
        threshold = config['threshold']
        nearest_neighbors = self.find_nearest_neighbors(test_embedings, train_embeddings, n_neighbors, threshold)
        logger.info(f"Neighbours were found - {nearest_neighbors}")
        results = []
        for test_idx, neighbors in enumerate(nearest_neighbors):
            if 1 == 1:
                neighbor_idx = neighbors 
                results.append([
                    test_idx, 
                    train_labels[neighbor_idx], 
                    train_labels_with_text[neighbor_idx],
                    train_filenames[neighbor_idx],
                ])
            else:
                results.append([
                    test_filenames[test_idx],  
                    None,  
                    None,
                    None
                ])

        df = pd.DataFrame(results, columns=['Test_Embedding', 'Label', 'Label_With_Text', 'Neighbour'])
        df["Label_With_Text"] = df["Label_With_Text"].map(lambda x: x[:-1])
        # df.to_excel(config['output_excel'], index=False)
        # logger.info("Saved results to Excel: {}", config['output_excel'])
        return df

In [10]:
def replace_words_by_similarity(label_text: str, text_list: List[str]) -> str:
    words = label_text.split()
    replaced_words = []
    for word in words:
        candidates = [text for text in text_list if text != "None" and len(text) == len(word)]
        if candidates:
            closest_match = min(candidates, key=lambda x: Levenshtein.distance(word, x))
            replaced_words.append(closest_match)
        else:
            replaced_words.append(word)
    return ' '.join(replaced_words)

In [11]:
class OcrPipeline(BaseModel):

    def __init__(self) -> None:
        self.weights_seg = "./weights/best.pt"
        self.weights_det = "./weights/best_det.pt"

    def predict(
        self, image: Image.Image, search_in_data: bool, dist_threshold: float
    ) -> PredictResult:
        ocr = OCR(self.weights_seg, self.weights_det, image)
        dict_text = ocr.get_text()
        # box_coord = ocr.data["box_xywhn"][0]
        model_neighbour = OcrBD()
        result = model_neighbour.predict(
            image, search_in_data=False, dist_threshold=10.5
        )

        neighbour_text = result["Label_With_Text"].iloc[0][1:-1]
        box_coord = result["Label"].iloc[0]
        new_label_text = replace_words_by_similarity(neighbour_text, dict_text["text_orig_img"])

        return [box_coord, new_label_text] 

## Инференс 

In [67]:
model = OcrPipeline()
ans = pd.DataFrame()
res_arr = []
for img_path in tqdm(os.listdir(config["test_images_folder"])):
    if img_path.split(".")[-1].lower() in ["jpg", "png", "jpeg"]:
        image = Image.open(os.path.join(config["test_images_folder"], img_path))
        result = [img_path]
        result += model.predict(image, search_in_data=False, dist_threshold=10.5)
        res_arr.append(result)

  0%|          | 0/113 [00:00<?, ?it/s]


0: 608x1088 (no detections), 427.6ms
Speed: 7.7ms preprocess, 427.6ms inference, 0.8ms postprocess per image at shape (1, 3, 608, 1088)

0: 384x640 1 0, 104.0ms
Speed: 3.6ms preprocess, 104.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)


[32m2024-11-10 10:50:23.621[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m12[0m - [1mLoaded configuration from config.yaml[0m
[32m2024-11-10 10:50:23.800[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m81[0m - [1mEmbeddings were read[0m
[32m2024-11-10 10:50:23.813[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m88[0m - [1mtrain_labels and train_labels_with_text were read[0m
[32m2024-11-10 10:50:23.813[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m90[0m - [1mTest image filenames were read[0m
[32m2024-11-10 10:50:23.818[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m95[0m - [1mNeighbours were found - [171][0m
  1%|          | 1/113 [00:04<07:48,  4.19s/it]


0: 1088x832 2 2s, 635.0ms
Speed: 14.3ms preprocess, 635.0ms inference, 6.7ms postprocess per image at shape (1, 3, 1088, 832)

0: 640x480 1 0, 131.7ms
Speed: 2.0ms preprocess, 131.7ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 480)


[32m2024-11-10 10:50:27.557[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m12[0m - [1mLoaded configuration from config.yaml[0m
[32m2024-11-10 10:50:27.741[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m81[0m - [1mEmbeddings were read[0m
[32m2024-11-10 10:50:27.751[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m88[0m - [1mtrain_labels and train_labels_with_text were read[0m
[32m2024-11-10 10:50:27.753[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m90[0m - [1mTest image filenames were read[0m
[32m2024-11-10 10:50:27.757[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m95[0m - [1mNeighbours were found - [65][0m
  2%|▏         | 2/113 [00:08<07:28,  4.04s/it]


0: 1088x832 1 2, 487.9ms
Speed: 3.9ms preprocess, 487.9ms inference, 3.0ms postprocess per image at shape (1, 3, 1088, 832)

0: 640x480 1 0, 111.0ms
Speed: 1.3ms preprocess, 111.0ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 480)


[32m2024-11-10 10:50:31.211[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m12[0m - [1mLoaded configuration from config.yaml[0m
[32m2024-11-10 10:50:31.456[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m81[0m - [1mEmbeddings were read[0m
[32m2024-11-10 10:50:31.466[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m88[0m - [1mtrain_labels and train_labels_with_text were read[0m
[32m2024-11-10 10:50:31.467[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m90[0m - [1mTest image filenames were read[0m
[32m2024-11-10 10:50:31.475[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m95[0m - [1mNeighbours were found - [32][0m
  3%|▎         | 3/113 [00:11<07:11,  3.93s/it]


0: 1088x832 2 2s, 510.4ms
Speed: 8.2ms preprocess, 510.4ms inference, 4.8ms postprocess per image at shape (1, 3, 1088, 832)

0: 640x480 1 0, 123.9ms
Speed: 1.1ms preprocess, 123.9ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 480)


[32m2024-11-10 10:50:35.546[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m12[0m - [1mLoaded configuration from config.yaml[0m
[32m2024-11-10 10:50:35.816[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m81[0m - [1mEmbeddings were read[0m
[32m2024-11-10 10:50:35.828[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m88[0m - [1mtrain_labels and train_labels_with_text were read[0m
[32m2024-11-10 10:50:35.829[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m90[0m - [1mTest image filenames were read[0m
[32m2024-11-10 10:50:35.837[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m95[0m - [1mNeighbours were found - [221][0m
  4%|▎         | 4/113 [00:16<07:24,  4.08s/it]


0: 832x1088 2 2s, 632.1ms
Speed: 10.7ms preprocess, 632.1ms inference, 5.6ms postprocess per image at shape (1, 3, 832, 1088)

0: 480x640 1 0, 137.8ms
Speed: 3.3ms preprocess, 137.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)


[32m2024-11-10 10:50:39.956[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m12[0m - [1mLoaded configuration from config.yaml[0m
[32m2024-11-10 10:50:40.129[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m81[0m - [1mEmbeddings were read[0m
[32m2024-11-10 10:50:40.139[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m88[0m - [1mtrain_labels and train_labels_with_text were read[0m
[32m2024-11-10 10:50:40.139[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m90[0m - [1mTest image filenames were read[0m
[32m2024-11-10 10:50:40.144[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m95[0m - [1mNeighbours were found - [74][0m
  4%|▍         | 5/113 [00:20<07:28,  4.16s/it]


0: 1088x832 2 2s, 563.6ms
Speed: 5.8ms preprocess, 563.6ms inference, 7.6ms postprocess per image at shape (1, 3, 1088, 832)

0: 640x480 1 0, 100.2ms
Speed: 2.7ms preprocess, 100.2ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 480)


[32m2024-11-10 10:50:43.964[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m12[0m - [1mLoaded configuration from config.yaml[0m
[32m2024-11-10 10:50:44.162[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m81[0m - [1mEmbeddings were read[0m
[32m2024-11-10 10:50:44.172[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m88[0m - [1mtrain_labels and train_labels_with_text were read[0m
[32m2024-11-10 10:50:44.172[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m90[0m - [1mTest image filenames were read[0m
[32m2024-11-10 10:50:44.178[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m95[0m - [1mNeighbours were found - [196][0m
  5%|▌         | 6/113 [00:24<07:19,  4.11s/it]


0: 832x1088 2 2s, 577.7ms
Speed: 13.5ms preprocess, 577.7ms inference, 7.2ms postprocess per image at shape (1, 3, 832, 1088)

0: 480x640 1 0, 145.6ms
Speed: 1.7ms preprocess, 145.6ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)


[32m2024-11-10 10:50:47.986[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m12[0m - [1mLoaded configuration from config.yaml[0m
[32m2024-11-10 10:50:48.215[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m81[0m - [1mEmbeddings were read[0m
[32m2024-11-10 10:50:48.227[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m88[0m - [1mtrain_labels and train_labels_with_text were read[0m
[32m2024-11-10 10:50:48.227[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m90[0m - [1mTest image filenames were read[0m
[32m2024-11-10 10:50:48.234[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m95[0m - [1mNeighbours were found - [21][0m
  6%|▌         | 7/113 [00:28<07:14,  4.10s/it]


0: 1088x832 1 2, 601.0ms
Speed: 6.1ms preprocess, 601.0ms inference, 2.9ms postprocess per image at shape (1, 3, 1088, 832)

0: 640x480 1 0, 105.5ms
Speed: 2.5ms preprocess, 105.5ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 480)


[32m2024-11-10 10:50:52.507[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m12[0m - [1mLoaded configuration from config.yaml[0m
[32m2024-11-10 10:50:52.737[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m81[0m - [1mEmbeddings were read[0m
[32m2024-11-10 10:50:52.752[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m88[0m - [1mtrain_labels and train_labels_with_text were read[0m
[32m2024-11-10 10:50:52.755[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m90[0m - [1mTest image filenames were read[0m
[32m2024-11-10 10:50:52.761[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m95[0m - [1mNeighbours were found - [38][0m
  7%|▋         | 8/113 [00:33<07:25,  4.25s/it]


0: 832x1088 2 2s, 504.9ms
Speed: 4.4ms preprocess, 504.9ms inference, 5.3ms postprocess per image at shape (1, 3, 832, 1088)

0: 480x640 1 0, 123.0ms
Speed: 1.5ms preprocess, 123.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


[32m2024-11-10 10:50:56.560[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m12[0m - [1mLoaded configuration from config.yaml[0m
[32m2024-11-10 10:50:56.751[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m81[0m - [1mEmbeddings were read[0m
[32m2024-11-10 10:50:56.762[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m88[0m - [1mtrain_labels and train_labels_with_text were read[0m
[32m2024-11-10 10:50:56.762[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m90[0m - [1mTest image filenames were read[0m
[32m2024-11-10 10:50:56.767[0m | [1mINFO    [0m | [36m__main__[0m:[36mpredict[0m:[36m95[0m - [1mNeighbours were found - [74][0m
  8%|▊         | 9/113 [00:37<07:12,  4.16s/it]


0: 1088x832 1 2, 478.4ms
Speed: 6.7ms preprocess, 478.4ms inference, 4.1ms postprocess per image at shape (1, 3, 1088, 832)

0: 640x480 1 0, 160.7ms
Speed: 5.7ms preprocess, 160.7ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 480)


In [68]:
final_df = pd.DataFrame(res_arr, columns=["image_file", "label", "label_text"])
final_df["label_text"] = final_df["label_text"].map(lambda x: '"'+x+'"')

In [69]:
final_df.to_csv("submission.csv", index=False)