# Find a Legend Challenge - Submission Notebook

Competition:  https://xeek.ai/challenges/extract-crossplot-markers <br>
Repository: https://github.com/REDA-solutions/AdvancedCV-competition

In [100]:
team_name = 'Team Barenstark' 
model_name = 'finalmodel'

## Imports

In [101]:
import os
import platform
import sys
import numpy as np
import pandas as pd
from glob import glob
import torch
import re

from models_ocr.preprocessing.preprocessor import Preprocessor # import of our class --> see repository
from models_ocr.pytesseract_model import PytesseractModel # import of our class --> see repository

## Description

![Model Architecture](misc/model_architecture.png)

## Submission inference pipeline

In [102]:
from time import perf_counter

In [103]:
TEST_DATA_ROOT = "raw_data/helvetios_challenge_dataset_test"
TEST_IMAGE_DATASET_PATH = f"{TEST_DATA_ROOT}/images"
TEST_LABELS_DATA_PATH = f"{TEST_DATA_ROOT}/labels"
TEST_INFERENCE_RESULTS_PATH = f"results"

In [104]:
def run_inference_pipeline(TEST_IMAGE_DATASET_PATH, TEST_INFERENCE_RESULTS_PATH):
   
   print(f"* OS                          : {platform.system()}, {platform.release()}")
   python_version = str(sys.version).replace('\n', ' ')
   print(f"* Python version              : {python_version}")

   os.makedirs(TEST_INFERENCE_RESULTS_PATH, exist_ok=True)
         
   ts_start = perf_counter()
   
   model = torch.hub.load('../yolov5/', 'custom', path='models_detection/best.pt', source='local')
   model.conf = 0.2
   preprocessor = Preprocessor(deskew=True)
   tesseract = PytesseractModel(preprocessor=preprocessor, confidence=15, custom_config=r"-l eng --psm 11")

   imgs = TEST_IMAGE_DATASET_PATH + r'/*.png'  
   imgs = list(glob(imgs))
   
   results = []
   sample_names = []

   for img in imgs:
      legends = model(img)
      legends = legends.crop()
      if len(legends) == 0:
         results.append(np.nan)
      else:
         predictions = []
         for legend_ in legends:
            legend = legend_['im']
            prediction = tesseract.predict(legend)
            predictions.extend(prediction)
         reg = re.compile('/[^0-9]/g')
         predictions = [s for s in predictions if not any(chr.isdigit() for chr in s)]
         predictions = [s for s in predictions if len(s)!=1 or not s.islower()]
         # predictions = [s for s in predictions if s.isalnum()]
         prediction_str = "["
         for word in predictions: prediction_str += f"'{word}' "
         prediction_str = prediction_str.strip()
         prediction_str += "]"
         if prediction_str == "[]":
            prediction_str = np.nan
         results.append(prediction_str)
      sample_names.append(img.split("\\")[1])
   
   ts_after_test = perf_counter()
   
   print(f"Inference time: {ts_after_test-ts_start:.2f} sec.")
   
   inference_results = {'sample_name': sample_names,
                        'legend': results}
   inference_results_df = pd.DataFrame(inference_results)
      
   inference_results_df.to_csv(f"{TEST_INFERENCE_RESULTS_PATH}/{team_name}_{model_name}_results.csv", index = False)
   
   print(inference_results_df)

   print(f"The submission file   : {TEST_INFERENCE_RESULTS_PATH}/{team_name}_{model_name}_results.csv")


In [105]:
run_inference_pipeline(TEST_IMAGE_DATASET_PATH, TEST_INFERENCE_RESULTS_PATH)

YOLOv5  v7.0-9-gf9ca365 Python-3.10.8 torch-1.13.0+cpu CPU



* OS                          : Windows, 10
* Python version              : 3.10.8 (tags/v3.10.8:aaaf517, Oct 11 2022, 16:50:30) [MSC v.1933 64 bit (AMD64)]


Fusing layers... 
Model summary: 157 layers, 7012822 parameters, 0 gradients, 15.8 GFLOPs
Adding AutoShape... 
Saved 1 image to [1mruns\detect\exp2848[0m
Saved results to runs\detect\exp2848

Saved 1 image to [1mruns\detect\exp2849[0m
Saved results to runs\detect\exp2849

Saved 1 image to [1mruns\detect\exp2850[0m
Saved results to runs\detect\exp2850

Saved 1 image to [1mruns\detect\exp2851[0m
Saved results to runs\detect\exp2851

Saved 1 image to [1mruns\detect\exp2852[0m
Saved results to runs\detect\exp2852

Saved 1 image to [1mruns\detect\exp2853[0m
Saved results to runs\detect\exp2853

Saved 1 image to [1mruns\detect\exp2854[0m
Saved results to runs\detect\exp2854

Saved 1 image to [1mruns\detect\exp2855[0m
Saved results to runs\detect\exp2855

Saved 1 image to [1mruns\detect\exp2856[0m
Saved results to runs\detect\exp2856

Saved 1 image to [1mruns\detect\exp2857[0m
Saved results to runs\detect\exp2857

Saved 1 image to [1mruns\detect\exp2858[0m
Saved results 

Inference time: 489.75 sec.
                  sample_name                                      legend
0    20220915195651573677.png       ['drt' 'qn' '©' 'deo' 'RE' '@' 'ber']
1    20220915195652176684.png                                 ['On' 'Os']
2    20220915195652713648.png                                ['Mobility']
3    20220915195653681519.png                                     ['exe']
4    20220915195654121931.png                                         NaN
..                        ...                                         ...
295  20220915195931225632.png                         ['area' 'ad' 'IDA']
296  20220915195931982187.png                               ['+' 'Canes']
297  20220915195932679590.png                                     ['ge.']
298  20220915195933359877.png         ['MeV' 'Output' '(bar/m)' '&' 'x”']
299  20220915195934085444.png  ['ME' '@' 'power' '@' 'Titanium' '©' 'ax']

[300 rows x 2 columns]
The submission file   : results/Team Barenstark_finalmodel_r