## OCR - SVHN dataset

## Installing dependencies

In [None]:
!apt-get install tesseract-ocr libtesseract-dev tesseract-ocr-por
!apt-get install poppler-utils
!pip install pdf2image pytesseract
!pip install fitz
!pip install PyMuPDF
!pip install jiwer
!pip install textbsr
!pip install torch>=1.8.1 torchvision>=0.9 cnstd==1.2
!pip install numpy==1.23
!pip install datasets

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
libtesseract-dev is already the newest version (4.1.1-2.1build1).
tesseract-ocr is already the newest version (4.1.1-2.1build1).
tesseract-ocr-por is already the newest version (1:4.00~git30-7274cfa-1.1).
0 upgraded, 0 newly installed, 0 to remove and 45 not upgraded.
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
poppler-utils is already the newest version (22.02.0-2ubuntu0.4).
0 upgraded, 0 newly installed, 0 to remove and 45 not upgraded.


In [None]:
!git clone https://github.com/JaidedAI/EasyOCR.git
!pip install easyocr
!pip install jiwer

Cloning into 'EasyOCR'...
remote: Enumerating objects: 2736, done.[K
remote: Total 2736 (delta 0), reused 0 (delta 0), pack-reused 2736[K
Receiving objects: 100% (2736/2736), 157.83 MiB | 13.90 MiB/s, done.
Resolving deltas: 100% (1664/1664), done.
Updating files: 100% (313/313), done.


## Load libraries


In [None]:
import pytesseract
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
from datasets import load_dataset
import easyocr
from skimage import util, img_as_ubyte

from pdf2image import convert_from_path, convert_from_bytes


In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


In [None]:
dataset_svhn = load_dataset("ufldl-stanford/svhn", 'full_numbers')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading builder script:   0%|          | 0.00/8.28k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/10.1k [00:00<?, ?B/s]

The repository for ufldl-stanford/svhn contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/ufldl-stanford/svhn.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


Downloading data:   0%|          | 0.00/404M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/277M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.96G [00:00<?, ?B/s]

Generating train split:   0%|          | 0/33402 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/13068 [00:00<?, ? examples/s]

Generating extra split:   0%|          | 0/202353 [00:00<?, ? examples/s]

In [None]:
dataset_svhn

DatasetDict({
    train: Dataset({
        features: ['image', 'digits'],
        num_rows: 33402
    })
    test: Dataset({
        features: ['image', 'digits'],
        num_rows: 13068
    })
    extra: Dataset({
        features: ['image', 'digits'],
        num_rows: 202353
    })
})

In [None]:
svhn_data = []

for i in range(0, len(dataset_svhn['test'])):
    image = dataset_svhn['test'][i]['image']
    label = ''.join(map(str, dataset_svhn['test'][i]['digits']['label']))

    svhn_data.append([image, label, 'SVHN'])


In [None]:
svhn_df = pd.DataFrame(svhn_data)
svhn_df.columns = ['image',	'label',	'type']
svhn_df

In [None]:
import time

def get_image_gray(image):
      inicio = time.time()
      roi_image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
      fim = time.time()
      tempo_decorrido = fim - inicio

      return roi_image_gray, tempo_decorrido

def get_image_image_trans(image):
      inicio = time.time()
      ret, roi_image_trans = cv2.threshold(image, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
      fim = time.time()
      tempo_decorrido = fim - inicio

      return roi_image_trans, tempo_decorrido

def get_image_binary(image):
      inicio = time.time()
      _, roi_image_binary = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
      fim = time.time()
      tempo_decorrido = fim - inicio

      return roi_image_binary, tempo_decorrido

def get_image_trans_bilateral(image):
      inicio = time.time()
      roi_image_trans_bilateral = cv2.bilateralFilter(image, 9, 75, 75)
      fim = time.time()
      tempo_decorrido = fim - inicio

      return roi_image_trans_bilateral, tempo_decorrido


def get_pytesseract_image_to_string(image):
      inicio = time.time()
      text = pytesseract.image_to_string(image , lang='eng')
      fim = time.time()
      tempo_decorrido = fim - inicio

      return text, tempo_decorrido


def get_easyocr_readtext(reader, image):
      inicio = time.time()
      prediction = reader.readtext(image, detail = 0)
      fim = time.time()
      tempo_decorrido = fim - inicio

      return prediction, tempo_decorrido


In [None]:
from PIL import Image
import easyocr
from jiwer import wer, cer, wil, wip, mer

def get_cv2_pytesseract_extraction(docs_test_df):
    dict_values = []

    for index, row in docs_test_df.iterrows():
      print(index, svhn_df.iloc[index]['label'])
      image = np.array(svhn_df.iloc[index]['image'])

      text, tempo_decorrido_pytesseract = get_pytesseract_image_to_string(image)
      clean_text = text.strip().replace("\n", " ").replace("\r", "")

      dict_values.append([docs_test_df.iloc[index]['label'], 'original', clean_text, None, tempo_decorrido_pytesseract])

      roi_image_gray, tempo_decorrido_image_gray = get_image_gray(image)
      roi_image_trans, tempo_decorrido_image_trans = get_image_image_trans(roi_image_gray)
      roi_image_binary, tempo_decorrido_image_binary = get_image_binary(roi_image_gray)
      roi_image_trans_bilateral, tempo_decorrido_image_trans_bilateral = get_image_trans_bilateral(roi_image_gray)

      text, tempo_decorrido_pytesseract = get_pytesseract_image_to_string(roi_image_gray)
      clean_text = text.strip().replace("\n", " ").replace("\r", "")
      dict_values.append([docs_test_df.iloc[index]['label'], 'image_full_gray', clean_text, tempo_decorrido_image_gray, tempo_decorrido_pytesseract])

      text, tempo_decorrido_pytesseract = get_pytesseract_image_to_string(roi_image_trans)
      clean_text = text.strip().replace("\n", " ").replace("\r", "")
      dict_values.append([docs_test_df.iloc[index]['label'], 'limiarização_de_otsu',  clean_text, tempo_decorrido_image_trans, tempo_decorrido_pytesseract])

      text, tempo_decorrido_pytesseract = get_pytesseract_image_to_string(roi_image_binary)
      clean_text = text.strip().replace("\n", " ").replace("\r", "")
      dict_values.append([docs_test_df.iloc[index]['label'], 'limiarização_binária ',  clean_text, tempo_decorrido_image_binary, tempo_decorrido_pytesseract])

      text, tempo_decorrido_pytesseract = get_pytesseract_image_to_string(roi_image_trans_bilateral)
      clean_text = text.strip().replace("\n", " ").replace("\r", "")
      dict_values.append([docs_test_df.iloc[index]['label'], 'bilateral',  clean_text, tempo_decorrido_image_trans_bilateral, tempo_decorrido_pytesseract])

    return dict_values

def get_cv2_easyocr_extraction(docs_test_df):
    reader = easyocr.Reader(['en'])

    dict_values = []
    for index, row in docs_test_df.iterrows():


      image = np.array(svhn_df.iloc[index]['image'])

      prediction, tempo_decorrido_easyocr = get_easyocr_readtext(reader, image)
      prediction = prediction[0] if len(prediction) > 0 else ''
      dict_values.append([docs_test_df.iloc[index]['label'], 'original', prediction, None, tempo_decorrido_easyocr])

      roi_image_gray, tempo_decorrido_image_gray = get_image_gray(image)
      roi_image_trans, tempo_decorrido_image_trans = get_image_image_trans(roi_image_gray)
      roi_image_binary, tempo_decorrido_image_binary = get_image_binary(roi_image_gray)
      roi_image_trans_bilateral, tempo_decorrido_image_trans_bilateral = get_image_trans_bilateral(roi_image_gray)

      prediction, tempo_decorrido_easyocr = get_easyocr_readtext(reader, roi_image_gray)
      prediction = prediction[0] if len(prediction) > 0 else ''
      dict_values.append([docs_test_df.iloc[index]['label'], 'image_full_gray', prediction, tempo_decorrido_image_gray, tempo_decorrido_easyocr])

      prediction, tempo_decorrido_easyocr = get_easyocr_readtext(reader, roi_image_trans)
      prediction = prediction[0] if len(prediction) > 0 else ''
      dict_values.append([docs_test_df.iloc[index]['label'], 'limiarização_de_otsu', prediction, tempo_decorrido_image_trans, tempo_decorrido_easyocr])

      prediction, tempo_decorrido_easyocr = get_easyocr_readtext(reader, roi_image_binary)
      prediction = prediction[0] if len(prediction) > 0 else ''
      dict_values.append([docs_test_df.iloc[index]['label'], 'limiarização_binária', prediction, tempo_decorrido_image_binary, tempo_decorrido_easyocr])

      prediction, tempo_decorrido_easyocr = get_easyocr_readtext(reader, roi_image_trans_bilateral)
      prediction = prediction[0] if len(prediction) > 0 else ''
      dict_values.append([docs_test_df.iloc[index]['label'], 'bilateral', prediction, tempo_decorrido_image_trans_bilateral, tempo_decorrido_easyocr])

    return dict_values


def get_esrgan_pytesseract_extraction(docs_test_df):
    dict_values = []
    path_folder = '/content/crop_craft-images/improv_test_images/'

    for index, row in docs_test_df.iterrows():

      path = path_folder
      path = path + row['filename']
      path = path.replace("jpg", "png")
      print(path)

      image = cv2.imread(path)

      text, tempo_decorrido_pytesseract = get_pytesseract_image_to_string(image)
      clean_text = text.strip().replace("\n", " ").replace("\r", "")

      dict_values.append([row['path'], row['filename'], 'original', clean_text, None, tempo_decorrido_pytesseract])

      roi_image_gray, tempo_decorrido_image_gray = get_image_gray(image)
      roi_image_trans, tempo_decorrido_image_trans = get_image_image_trans(roi_image_gray)
      roi_image_binary, tempo_decorrido_image_binary = get_image_binary(roi_image_gray)
      roi_image_trans_bilateral, tempo_decorrido_image_trans_bilateral = get_image_trans_bilateral(roi_image_gray)

      text, tempo_decorrido_pytesseract = get_pytesseract_image_to_string(roi_image_gray)
      clean_text = text.strip().replace("\n", " ").replace("\r", "")
      dict_values.append([row['path'], row['filename'], 'image_full_gray',  clean_text, tempo_decorrido_image_gray, tempo_decorrido_pytesseract])

      text, tempo_decorrido_pytesseract = get_pytesseract_image_to_string(roi_image_trans)
      clean_text = text.strip().replace("\n", " ").replace("\r", "")
      dict_values.append([row['path'], row['filename'], 'limiarização_de_otsu', clean_text, tempo_decorrido_image_trans, tempo_decorrido_pytesseract])

      text, tempo_decorrido_pytesseract = get_pytesseract_image_to_string(roi_image_binary)
      clean_text = text.strip().replace("\n", " ").replace("\r", "")
      dict_values.append([row['path'], row['filename'], 'limiarização_binária ', clean_text, tempo_decorrido_image_binary, tempo_decorrido_pytesseract])

      text, tempo_decorrido_pytesseract = get_pytesseract_image_to_string(roi_image_trans_bilateral)
      clean_text = text.strip().replace("\n", " ").replace("\r", "")
      dict_values.append([row['path'], row['filename'], 'bilateral', clean_text, tempo_decorrido_image_trans_bilateral, tempo_decorrido_pytesseract])

    return dict_values


def get_esrgan_easyocr_extraction(docs_test_df):
    reader = easyocr.Reader(['pt'])

    dict_values = []
    path_folder = '/content/crop_craft-images/improv_test_images/'

    for index, row in docs_test_df.iterrows():

      path = path_folder

      path = path + row['filename']
      path = path.replace("jpg", "png")
      print(path)

      image = cv2.imread(path)

      prediction, tempo_decorrido_easyocr = get_easyocr_readtext(reader, image)
      prediction = prediction[0] if len(prediction) > 0 else ''
      dict_values.append([row['path'], row['filename'], 'original', prediction, None, tempo_decorrido_easyocr])

      roi_image_gray, tempo_decorrido_image_gray = get_image_gray(image)
      roi_image_trans, tempo_decorrido_image_trans = get_image_image_trans(roi_image_gray)
      roi_image_binary, tempo_decorrido_image_binary = get_image_binary(roi_image_gray)
      roi_image_trans_bilateral, tempo_decorrido_image_trans_bilateral = get_image_trans_bilateral(roi_image_gray)

      prediction, tempo_decorrido_easyocr = get_easyocr_readtext(reader, roi_image_gray)
      prediction = prediction[0] if len(prediction) > 0 else ''
      dict_values.append([row['path'], row['filename'], 'image_full_gray',  prediction, tempo_decorrido_image_gray, tempo_decorrido_easyocr])

      prediction, tempo_decorrido_easyocr = get_easyocr_readtext(reader, roi_image_trans)
      prediction = prediction[0] if len(prediction) > 0 else ''
      dict_values.append([row['path'], row['filename'], 'limiarização_de_otsu', prediction, tempo_decorrido_image_trans, tempo_decorrido_easyocr])

      prediction, tempo_decorrido_easyocr = get_easyocr_readtext(reader, roi_image_binary)
      prediction = prediction[0] if len(prediction) > 0 else ''
      dict_values.append([row['path'], row['filename'], 'limiarização_binária', prediction, tempo_decorrido_image_binary, tempo_decorrido_easyocr])

      prediction, tempo_decorrido_easyocr = get_easyocr_readtext(reader, roi_image_trans_bilateral)
      prediction = prediction[0] if len(prediction) > 0 else ''
      dict_values.append([row['path'], row['filename'], 'bilateral', prediction, tempo_decorrido_image_trans_bilateral, tempo_decorrido_easyocr])

    return dict_values

def process_result(dict_values):

  data_df = pd.DataFrame(dict_values)
  data_df.columns = ['label', 'filter',  'prediction', 'cv2 time', 'prediction time']

  return data_df


def get_metrics(data_df):

  for index, row in data_df.iterrows():
    data_df.at[index, 'wer'] = wer(row['label'], ''.join(row['prediction']) )
    data_df.at[index, 'cer'] = cer(row['label'], ''.join(row['prediction']) )
    data_df.at[index, 'wil'] = wil(row['label'], ''.join(row['prediction']) )
    data_df.at[index, 'wip'] = wip(row['label'], ''.join(row['prediction']) )
    data_df.at[index, 'mer'] = mer(row['label'], ''.join(row['prediction']) )

  return data_df

### OpenCV[cinza] -> Tesseract | fluxo 4: CRAFT -> OpenCV[todos preproc] -> Tesseract

In [None]:
len(svhn_df)

13068

In [None]:
dict_values = get_cv2_pytesseract_extraction(svhn_df)

[1;30;43mA saída de streaming foi truncada nas últimas 5000 linhas.[0m
8068 209
8069 15
8070 286
8071 17
8072 6
8073 6
8074 49
8075 12
8076 23
8077 32
8078 19
8079 21
8080 126
8081 27
8082 14
8083 35
8084 2
8085 43
8086 24
8087 69
8088 78
8089 26
8090 73
8091 20
8092 12
8093 110
8094 58
8095 158
8096 19
8097 2
8098 4
8099 4
8100 123
8101 13
8102 29
8103 72
8104 6
8105 12
8106 12
8107 1
8108 18
8109 205
8110 79
8111 16
8112 76
8113 24
8114 1
8115 16
8116 34
8117 11
8118 69
8119 50
8120 425
8121 2
8122 24
8123 19
8124 27
8125 27
8126 128
8127 24
8128 29
8129 27
8130 24
8131 23
8132 38
8133 158
8134 37
8135 27
8136 1531
8137 52
8138 199
8139 287
8140 125
8141 2
8142 18
8143 255
8144 10
8145 18
8146 155
8147 51
8148 190
8149 147
8150 205
8151 47
8152 51
8153 255
8154 33
8155 1
8156 5
8157 12
8158 125
8159 51
8160 45
8161 2
8162 16
8163 19
8164 17
8165 31
8166 23
8167 63
8168 21
8169 93
8170 44
8171 48
8172 23
8173 16
8174 26
8175 14
8176 54
8177 208
8178 59
8179 27
8180 100
8181 30
8182 

In [None]:
data_df = process_result(dict_values)

In [None]:
data_df = get_metrics(data_df)

In [None]:
data_df

Unnamed: 0,label,filter,prediction,cv2 time,prediction time,wer,cer,wil,wip,mer,fluxo
0,31,original,,,0.105295,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> Tesseract
1,31,image_full_gray,,0.000036,0.103361,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> Tesseract
2,31,limiarização_de_otsu,,0.000037,0.104338,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> Tesseract
3,31,limiarização_binária,,0.000004,0.102804,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> Tesseract
4,31,bilateral,,0.013015,0.103670,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> Tesseract
...,...,...,...,...,...,...,...,...,...,...,...
65335,183,original,po,,0.110692,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> Tesseract
65336,183,image_full_gray,2,0.000040,0.109671,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> Tesseract
65337,183,limiarização_de_otsu,vs,0.000024,0.108179,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> Tesseract
65338,183,limiarização_binária,wo,0.000005,0.112807,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> Tesseract


In [None]:
data_df.to_csv('fluxos_3-4_svhn.csv', index=False)

In [None]:
%cp -r /content/fluxos_3-4_svhn.csv /content/drive/MyDrive/'Colab Notebooks'/OCR/Pos/results/


In [None]:
approach_3_4_data_df = data_df
approach_3_4_data_df['fluxo'] = 'OpenCV[todos preproc] -> Tesseract'

approach_3_4_data_df.head()

Unnamed: 0,label,filter,prediction,cv2 time,prediction time,wer,cer,wil,wip,mer,fluxo
0,31,original,,,0.105295,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> Tesseract
1,31,image_full_gray,,3.6e-05,0.103361,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> Tesseract
2,31,limiarização_de_otsu,,3.7e-05,0.104338,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> Tesseract
3,31,limiarização_binária,,4e-06,0.102804,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> Tesseract
4,31,bilateral,,0.013015,0.10367,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> Tesseract


### OpenCV[cinza] -> EASYOCR | fluxo 2: CRAFT -> OpenCV[todos preproc] -> EASYOCR

In [None]:
dict_values = get_cv2_easyocr_extraction(svhn_df)

[1;30;43mA saída de streaming foi truncada nas últimas 5000 linhas.[0m
8068 209
8069 15
8070 286
8071 17
8072 6
8073 6
8074 49
8075 12
8076 23
8077 32
8078 19
8079 21
8080 126
8081 27
8082 14
8083 35
8084 2
8085 43
8086 24
8087 69
8088 78
8089 26
8090 73
8091 20
8092 12
8093 110
8094 58
8095 158
8096 19
8097 2
8098 4
8099 4
8100 123
8101 13
8102 29
8103 72
8104 6
8105 12
8106 12
8107 1
8108 18
8109 205
8110 79
8111 16
8112 76
8113 24
8114 1
8115 16
8116 34
8117 11
8118 69
8119 50
8120 425
8121 2
8122 24
8123 19
8124 27
8125 27
8126 128
8127 24
8128 29
8129 27
8130 24
8131 23
8132 38
8133 158
8134 37
8135 27
8136 1531
8137 52
8138 199
8139 287
8140 125
8141 2
8142 18
8143 255
8144 10
8145 18
8146 155
8147 51
8148 190
8149 147
8150 205
8151 47
8152 51
8153 255
8154 33
8155 1
8156 5
8157 12
8158 125
8159 51
8160 45
8161 2
8162 16
8163 19
8164 17
8165 31
8166 23
8167 63
8168 21
8169 93
8170 44
8171 48
8172 23
8173 16
8174 26
8175 14
8176 54
8177 208
8178 59
8179 27
8180 100
8181 30
8182 

In [None]:
data_df = process_result(dict_values)

In [None]:
data_df = get_metrics(data_df)

In [None]:
approach_1_2_data_df = data_df.copy()
approach_1_2_data_df['fluxo'] = 'OpenCV[todos preproc] -> EASYOCR'

approach_1_2_data_df.head()

Unnamed: 0,label,filter,prediction,cv2 time,prediction time,wer,cer,wil,wip,mer,fluxo
0,31,original,,,0.00889,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR
1,31,image_full_gray,,1.4e-05,0.005503,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR
2,31,limiarização_de_otsu,,1.2e-05,0.005413,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR
3,31,limiarização_binária,,3e-06,0.005227,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR
4,31,bilateral,,0.000283,0.005213,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR


In [None]:
result = pd.concat([approach_1_2_data_df, approach_3_4_data_df])
result

Unnamed: 0,label,filter,prediction,cv2 time,prediction time,wer,cer,wil,wip,mer,fluxo
0,31,original,,,0.008890,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR
1,31,image_full_gray,,0.000014,0.005503,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR
2,31,limiarização_de_otsu,,0.000012,0.005413,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR
3,31,limiarização_binária,,0.000003,0.005227,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR
4,31,bilateral,,0.000283,0.005213,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR
...,...,...,...,...,...,...,...,...,...,...,...
65335,183,original,po,,0.110692,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> Tesseract
65336,183,image_full_gray,2,0.000040,0.109671,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> Tesseract
65337,183,limiarização_de_otsu,vs,0.000024,0.108179,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> Tesseract
65338,183,limiarização_binária,wo,0.000005,0.112807,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> Tesseract


## ESRGAN Model

In [None]:
import os
import shutil
from textbsr import textbsr

Creating folder that will contain the enhanced images

In [None]:
! mkdir /content/improv_test_images
! mkdir /content/test_images

In [None]:
for id, row in svhn_df.iterrows():

    name = '/content/test_images/' + svhn_df.iloc[id]['label'] + '.png'
    print(name)

    svhn_df.iloc[id]['image'].save(name)

[1;30;43mA saída de streaming foi truncada nas últimas 5000 linhas.[0m
/content/test_images/209.png
/content/test_images/15.png
/content/test_images/286.png
/content/test_images/17.png
/content/test_images/6.png
/content/test_images/6.png
/content/test_images/49.png
/content/test_images/12.png
/content/test_images/23.png
/content/test_images/32.png
/content/test_images/19.png
/content/test_images/21.png
/content/test_images/126.png
/content/test_images/27.png
/content/test_images/14.png
/content/test_images/35.png
/content/test_images/2.png
/content/test_images/43.png
/content/test_images/24.png
/content/test_images/69.png
/content/test_images/78.png
/content/test_images/26.png
/content/test_images/73.png
/content/test_images/20.png
/content/test_images/12.png
/content/test_images/110.png
/content/test_images/58.png
/content/test_images/158.png
/content/test_images/19.png
/content/test_images/2.png
/content/test_images/4.png
/content/test_images/4.png
/content/test_images/123.png
/co

In [None]:
diretorio_destino = '/content/test_images'
textbsr.bsr(input_path='/content/test_images/', output_path='/content/improv_test_images', aligned=True)

               Model Name : BSRGAN
                   GPU ID : 0
               Input Path : /content/test_images/
              Output Path : /content/improv_test_images
       Background SR Path : None
            Image Details : Aligned Text Layout. No text detection is used.
 Save LR & SR text layout : False
          Restoring  0001 : x4 --> 0.png
          Restoring  0002 : x4 --> 00.png
          Restoring  0003 : x4 --> 003.png
          Restoring  0004 : x4 --> 004.png
          Restoring  0005 : x4 --> 01287.png
          Restoring  0006 : x4 --> 04.png
          Restoring  0007 : x4 --> 05.png
          Restoring  0008 : x4 --> 09.png
          Restoring  0009 : x4 --> 1.png
          Restoring  0010 : x4 --> 10.png
          Restoring  0011 : x4 --> 100.png
          Restoring  0012 : x4 --> 1000.png
          Restoring  0013 : x4 --> 1001.png
          Restoring  0014 : x4 --> 1004.png
          Restoring  0015 : x4 --> 101.png
          Restoring  0016 : x4 --> 1016.png
 

In [None]:
pasta = "/content/improv_test_images/"

arquivos = [f for f in os.listdir(pasta) if os.path.isfile(os.path.join(pasta, f)) ]
arquivos.sort()

nova_lista = [nome.replace('_BSRGANText', '') for nome in arquivos]
pasta = '/content/improv_test_images'

for arquivo_original, novo_nome in zip(arquivos, nova_lista):
    caminho_original = os.path.join(pasta, arquivo_original)
    caminho_novo = os.path.join(pasta, novo_nome)

    # Renomeia o arquivo
    os.rename(caminho_original, caminho_novo)

print("Arquivos renomeados com sucesso!")

### 'ESRGAN -> OpenCV -> Tesseract'

In [None]:
def get_esrgan_pytesseract_extraction(docs_test_df):
    dict_values = []
    path_folder = '/content/improv_test_images/'

    for index, row in docs_test_df.iterrows():
      path = path_folder + svhn_df.iloc[index]['label'] + '.png'
      print(index, path)
      image = cv2.imread(path)

      text, tempo_decorrido_pytesseract = get_pytesseract_image_to_string(image)
      clean_text = text.strip().replace("\n", " ").replace("\r", "")
      dict_values.append([docs_test_df.iloc[index]['label'], 'original', clean_text, None, tempo_decorrido_pytesseract])

      roi_image_gray, tempo_decorrido_image_gray = get_image_gray(image)
      roi_image_trans, tempo_decorrido_image_trans = get_image_image_trans(roi_image_gray)
      roi_image_binary, tempo_decorrido_image_binary = get_image_binary(roi_image_gray)
      roi_image_trans_bilateral, tempo_decorrido_image_trans_bilateral = get_image_trans_bilateral(roi_image_gray)

      text, tempo_decorrido_pytesseract = get_pytesseract_image_to_string(roi_image_gray)
      clean_text = text.strip().replace("\n", " ").replace("\r", "")
      dict_values.append([docs_test_df.iloc[index]['label'], 'image_full_gray', clean_text, tempo_decorrido_image_gray, tempo_decorrido_pytesseract])

      text, tempo_decorrido_pytesseract = get_pytesseract_image_to_string(roi_image_gray)
      clean_text = text.strip().replace("\n", " ").replace("\r", "")
      dict_values.append([docs_test_df.iloc[index]['label'], 'limiarização_de_otsu', clean_text, tempo_decorrido_image_trans, tempo_decorrido_pytesseract])

      text, tempo_decorrido_pytesseract = get_pytesseract_image_to_string(roi_image_binary)
      clean_text = text.strip().replace("\n", " ").replace("\r", "")
      dict_values.append([docs_test_df.iloc[index]['label'], 'limiarização_binária ', clean_text, tempo_decorrido_image_binary, tempo_decorrido_pytesseract])

      text, tempo_decorrido_pytesseract = get_pytesseract_image_to_string(roi_image_trans_bilateral)
      clean_text = text.strip().replace("\n", " ").replace("\r", "")
      dict_values.append([docs_test_df.iloc[index]['label'], 'bilateral', clean_text, tempo_decorrido_image_trans_bilateral, tempo_decorrido_pytesseract])

    return dict_values

In [None]:
dict_values = get_esrgan_pytesseract_extraction(svhn_df)

[1;30;43mA saída de streaming foi truncada nas últimas 5000 linhas.[0m
8068 /content/improv_test_images/209.png
8069 /content/improv_test_images/15.png
8070 /content/improv_test_images/286.png
8071 /content/improv_test_images/17.png
8072 /content/improv_test_images/6.png
8073 /content/improv_test_images/6.png
8074 /content/improv_test_images/49.png
8075 /content/improv_test_images/12.png
8076 /content/improv_test_images/23.png
8077 /content/improv_test_images/32.png
8078 /content/improv_test_images/19.png
8079 /content/improv_test_images/21.png
8080 /content/improv_test_images/126.png
8081 /content/improv_test_images/27.png
8082 /content/improv_test_images/14.png
8083 /content/improv_test_images/35.png
8084 /content/improv_test_images/2.png
8085 /content/improv_test_images/43.png
8086 /content/improv_test_images/24.png
8087 /content/improv_test_images/69.png
8088 /content/improv_test_images/78.png
8089 /content/improv_test_images/26.png
8090 /content/improv_test_images/73.png
8091 /c

In [None]:
data_df = process_result(dict_values)

In [None]:
data_df = get_metrics(data_df)

In [None]:
approach_7_8_data_df = data_df.copy()
approach_7_8_data_df['fluxo'] =  'ESRGAN -> OpenCV -> Tesseract'

approach_7_8_data_df.head()

Unnamed: 0,label,filter,prediction,cv2 time,prediction time,wer,cer,wil,wip,mer,fluxo
0,31,original,,,0.166094,1.0,1.0,1.0,0.0,1.0,ESRGAN -> OpenCV -> Tesseract
1,31,image_full_gray,,0.000242,0.129879,1.0,1.0,1.0,0.0,1.0,ESRGAN -> OpenCV -> Tesseract
2,31,limiarização_de_otsu,,0.000262,0.127403,1.0,1.0,1.0,0.0,1.0,ESRGAN -> OpenCV -> Tesseract
3,31,limiarização_binária,,2.3e-05,0.111676,1.0,1.0,1.0,0.0,1.0,ESRGAN -> OpenCV -> Tesseract
4,31,bilateral,,0.013161,0.126307,1.0,1.0,1.0,0.0,1.0,ESRGAN -> OpenCV -> Tesseract


### 'ESRGAN -> OpenCV -> EASYOCR'

In [None]:
def get_esrgan_easyocr_extraction(docs_test_df):
    reader = easyocr.Reader(['en'])

    dict_values = []
    path_folder = '/content/improv_test_images/'

    for index, row in docs_test_df.iterrows():

      path = path_folder + svhn_df.iloc[index]['label'] + '.png'
      print(index, path)
      image = cv2.imread(path)

      prediction, tempo_decorrido_easyocr = get_easyocr_readtext(reader, image)
      prediction = prediction[0] if len(prediction) > 0 else ''
      dict_values.append([docs_test_df.iloc[index]['label'], 'original', prediction, None, tempo_decorrido_easyocr])

      # Aplicando os filtros
      # Conversão para Escala de Cinza:
      roi_image_gray, tempo_decorrido_image_gray = get_image_gray(image)
      roi_image_trans, tempo_decorrido_image_trans = get_image_image_trans(roi_image_gray)
      roi_image_binary, tempo_decorrido_image_binary = get_image_binary(roi_image_gray)
      roi_image_trans_bilateral, tempo_decorrido_image_trans_bilateral = get_image_trans_bilateral(roi_image_gray)


      prediction, tempo_decorrido_easyocr = get_easyocr_readtext(reader, roi_image_gray)
      prediction = prediction[0] if len(prediction) > 0 else ''
      dict_values.append([docs_test_df.iloc[index]['label'], 'image_full_gray', prediction, tempo_decorrido_image_gray, tempo_decorrido_easyocr])

      # Aplicando o primeiro filtro na imagem
      prediction, tempo_decorrido_easyocr = get_easyocr_readtext(reader, roi_image_trans)
      prediction = prediction[0] if len(prediction) > 0 else ''
      dict_values.append([docs_test_df.iloc[index]['label'], 'limiarização_de_otsu', prediction, tempo_decorrido_image_trans, tempo_decorrido_easyocr])

      # Aplicando o segundo filtro na imagem
      prediction, tempo_decorrido_easyocr = get_easyocr_readtext(reader, roi_image_binary)
      prediction = prediction[0] if len(prediction) > 0 else ''
      dict_values.append([docs_test_df.iloc[index]['label'], 'limiarização_binária', prediction, tempo_decorrido_image_binary, tempo_decorrido_easyocr])

      # Aplicando o terceiro filtro na imagem
      prediction, tempo_decorrido_easyocr = get_easyocr_readtext(reader, roi_image_trans_bilateral)
      prediction = reader.readtext(roi_image_trans_bilateral, detail = 0)
      prediction = prediction[0] if len(prediction) > 0 else ''
      dict_values.append([docs_test_df.iloc[index]['label'], 'bilateral', prediction, tempo_decorrido_image_trans_bilateral, tempo_decorrido_easyocr])

    return dict_values


In [None]:
dict_values = get_esrgan_easyocr_extraction(svhn_df)

[1;30;43mA saída de streaming foi truncada nas últimas 5000 linhas.[0m
8068 /content/improv_test_images/209.png
8069 /content/improv_test_images/15.png
8070 /content/improv_test_images/286.png
8071 /content/improv_test_images/17.png
8072 /content/improv_test_images/6.png
8073 /content/improv_test_images/6.png
8074 /content/improv_test_images/49.png
8075 /content/improv_test_images/12.png
8076 /content/improv_test_images/23.png
8077 /content/improv_test_images/32.png
8078 /content/improv_test_images/19.png
8079 /content/improv_test_images/21.png
8080 /content/improv_test_images/126.png
8081 /content/improv_test_images/27.png
8082 /content/improv_test_images/14.png
8083 /content/improv_test_images/35.png
8084 /content/improv_test_images/2.png
8085 /content/improv_test_images/43.png
8086 /content/improv_test_images/24.png
8087 /content/improv_test_images/69.png
8088 /content/improv_test_images/78.png
8089 /content/improv_test_images/26.png
8090 /content/improv_test_images/73.png
8091 /c

In [None]:
data_df = process_result(dict_values)

In [None]:
data_df = get_metrics(data_df)

In [None]:
approach_5_6_data_df = data_df.copy()
approach_5_6_data_df['fluxo'] = 'ESRGAN -> OpenCV -> EASYOCR'

approach_5_6_data_df.head(20)

Unnamed: 0,label,filter,prediction,cv2 time,prediction time,wer,cer,wil,wip,mer,fluxo
0,31,original,31,,0.021454,0.0,0.0,0.0,1.0,0.0,ESRGAN -> OpenCV -> EASYOCR
1,31,image_full_gray,31,0.0002,0.019185,0.0,0.0,0.0,1.0,0.0,ESRGAN -> OpenCV -> EASYOCR
2,31,limiarização_de_otsu,a,0.00021,0.019365,1.0,1.0,1.0,0.0,1.0,ESRGAN -> OpenCV -> EASYOCR
3,31,limiarização_binária,3,2.1e-05,0.019653,1.0,0.5,1.0,0.0,1.0,ESRGAN -> OpenCV -> EASYOCR
4,31,bilateral,31,0.012908,0.019528,0.0,0.0,0.0,1.0,0.0,ESRGAN -> OpenCV -> EASYOCR
5,3,original,,,0.011679,1.0,1.0,1.0,0.0,1.0,ESRGAN -> OpenCV -> EASYOCR
6,3,image_full_gray,,6.7e-05,0.011551,1.0,1.0,1.0,0.0,1.0,ESRGAN -> OpenCV -> EASYOCR
7,3,limiarização_de_otsu,I[,0.000186,0.0194,1.0,2.0,1.0,0.0,1.0,ESRGAN -> OpenCV -> EASYOCR
8,3,limiarização_binária,1'[,1.7e-05,0.024688,1.0,3.0,1.0,0.0,1.0,ESRGAN -> OpenCV -> EASYOCR
9,3,bilateral,M,0.011423,0.018001,1.0,1.0,1.0,0.0,1.0,ESRGAN -> OpenCV -> EASYOCR


In [None]:
result = pd.concat([approach_5_6_data_df, approach_7_8_data_df])
result

Unnamed: 0,label,filter,prediction,cv2 time,prediction time,wer,cer,wil,wip,mer,fluxo
0,31,original,31,,0.021454,0.0,0.0,0.0,1.0,0.0,ESRGAN -> OpenCV -> EASYOCR
1,31,image_full_gray,31,0.000200,0.019185,0.0,0.0,0.0,1.0,0.0,ESRGAN -> OpenCV -> EASYOCR
2,31,limiarização_de_otsu,a,0.000210,0.019365,1.0,1.0,1.0,0.0,1.0,ESRGAN -> OpenCV -> EASYOCR
3,31,limiarização_binária,3,0.000021,0.019653,1.0,0.5,1.0,0.0,1.0,ESRGAN -> OpenCV -> EASYOCR
4,31,bilateral,31,0.012908,0.019528,0.0,0.0,0.0,1.0,0.0,ESRGAN -> OpenCV -> EASYOCR
...,...,...,...,...,...,...,...,...,...,...,...
65335,183,original,\v,,0.147386,1.0,1.0,1.0,0.0,1.0,ESRGAN -> OpenCV -> Tesseract
65336,183,image_full_gray,vo,0.000178,0.121544,1.0,1.0,1.0,0.0,1.0,ESRGAN -> OpenCV -> Tesseract
65337,183,limiarização_de_otsu,vo,0.000185,0.120769,1.0,1.0,1.0,0.0,1.0,ESRGAN -> OpenCV -> Tesseract
65338,183,limiarização_binária,b2,0.000026,0.113375,1.0,1.0,1.0,0.0,1.0,ESRGAN -> OpenCV -> Tesseract
