# Validation of Results


In [None]:
!pip install jiwer

Collecting jiwer
  Downloading jiwer-3.0.4-py3-none-any.whl (21 kB)
Collecting rapidfuzz<4,>=3 (from jiwer)
  Downloading rapidfuzz-3.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rapidfuzz, jiwer
Successfully installed jiwer-3.0.4 rapidfuzz-3.9.3


In [None]:
import pandas as pd
from jiwer import wer, cer, wil, wip, mer


# Get Datasets

In [None]:
def get_bid_dataset():
      fluxo_1_4 = pd.read_csv('fluxos_1-4_ID_CARD.csv')
      fluxo_5_8 = pd.read_csv('fluxos_5-8_ID_CARD.csv')

      result_BID = pd.concat([fluxo_1_4, fluxo_5_8], ignore_index=True)
      result_filtered_BID = result_BID[result_BID.path != '/content/crop_craft-images/photos/']

      result_filtered_BID['cv2 time'] = result_filtered_BID['cv2 time'].fillna(0)
      result_filtered_BID['prediction time'] = result_filtered_BID['prediction time'].fillna(0)
      result_filtered_BID.loc[:, 'time_total'] = result_filtered_BID['cv2 time'] + result_filtered_BID['prediction time']

      return result_filtered_BID

def get_IIIT5K_dataset():
    fluxo_1_4_IIIT5K = pd.read_csv('fluxos_1-4_IIIT5K.csv')
    fluxo_5_8_IIIT5K = pd.read_csv('fluxos_5-8_IIIT5K.csv')

    result_IIIT5K = pd.concat([fluxo_1_4_IIIT5K, fluxo_5_8_IIIT5K], ignore_index=True)

    result_filtered_IIIT5K = result_IIIT5K

    result_filtered_IIIT5K['cv2 time'] = result_filtered_IIIT5K['cv2 time'].fillna(0)
    result_filtered_IIIT5K['prediction time'] = result_filtered_IIIT5K['prediction time'].fillna(0)

    result_filtered_IIIT5K['time_total'] = result_filtered_IIIT5K['cv2 time'] + result_filtered_IIIT5K['prediction time']

    return result_filtered_IIIT5K


def get_svhn_dataset():

    fluxo_1_4_svhn = pd.read_csv('fluxos_1-4_svhn.csv')
    fluxo_5_8_svhn = pd.read_csv('fluxos_5-8_svhn.csv')

    result_svhn = pd.concat([fluxo_1_4_svhn, fluxo_5_8_svhn], ignore_index=True)

    result_filtered_svhn = result_svhn

    result_filtered_svhn['cv2 time'] = result_filtered_svhn['cv2 time'].fillna(0)
    result_filtered_svhn['prediction time'] = result_filtered_svhn['prediction time'].fillna(0)
    result_filtered_svhn.loc[:, 'time_total'] = result_filtered_svhn['cv2 time'] + result_filtered_svhn['prediction time']

    return result_filtered_svhn


def get_SROEI_dataset():

    fluxo_1_4_SROEI = pd.read_csv('fluxos_1-4_SROIE.csv')
    fluxo_5_8_SROEI = pd.read_csv('fluxos_5-8_SROIE.csv')

    result_SROEI = pd.concat([fluxo_1_4_SROEI, fluxo_5_8_SROEI], ignore_index=True)
    result_filtered_SROEI = result_SROEI

    result_filtered_SROEI['cv2 time'] = result_filtered_SROEI['cv2 time'].fillna(0)
    result_filtered_SROEI['prediction time'] = result_filtered_SROEI['prediction time'].fillna(0)
    result_filtered_SROEI.loc[:, 'time_total'] = result_filtered_SROEI['cv2 time'] + result_filtered_SROEI['prediction time']

    return result_filtered_SROEI

# Helper functions


In [None]:
def get_groupby_results(dataset, experiments_lists, fluxo_list, label=''):
      ##########################################
      # 'OpenCV[todos preproc] -> EASYOCR'
      ##########################################

      opencv_easyocr = dataset[dataset['fluxo'] == fluxo_list[0]]

      opencv_easyocr_result = opencv_easyocr.groupby('filter').agg(
          {
              'wer' + label: ['mean', 'std'],
              'cer' + label: ['mean', 'std'],
              'wil' + label: ['mean', 'std'],
              'wip' + label: ['mean', 'std']
          },
      )
      opencv_easyocr_result_df = opencv_easyocr_result.reset_index()
      opencv_easyocr_result_df.columns = ['_'.join(col).strip() for col in opencv_easyocr_result_df.columns.values]
      opencv_easyocr_result_df['fluxo'] = experiments_lists[0]

      ##########################################
      # 'OpenCV[todos preproc] -> Tesseract'
      ##########################################

      opencv_tesseract = dataset[dataset['fluxo'] == fluxo_list[1]]

      opencv_tesseract_result = opencv_tesseract.groupby('filter').agg(
          {
              'wer' + label: ['mean', 'std'],
              'cer' + label: ['mean', 'std'],
              'wil' + label: ['mean', 'std'],
              'wip' + label: ['mean', 'std']
          },
      )
      opencv_tesseract_result = opencv_tesseract_result.reset_index()
      opencv_tesseract_result.columns = ['_'.join(col).strip() for col in opencv_tesseract_result.columns.values]
      opencv_tesseract_result['fluxo'] = experiments_lists[1]


      ##########################################
      # 'CRAFT -> ESRGAN -> OpenCV -> EASYOCR'
      ##########################################

      ersgan_opencv_easyocr = dataset[dataset['fluxo'] == fluxo_list[2]]

      ersgan_opencv_easyocr_result = ersgan_opencv_easyocr.groupby('filter').agg(
          {
              'wer' + label: ['mean', 'std'],
              'cer' + label: ['mean', 'std'],
              'wil' + label: ['mean', 'std'],
              'wip' + label: ['mean', 'std']
          },
      )
      ersgan_opencv_easyocr_result = ersgan_opencv_easyocr_result.reset_index()
      ersgan_opencv_easyocr_result.columns = ['_'.join(col).strip() for col in ersgan_opencv_easyocr_result.columns.values]
      ersgan_opencv_easyocr_result['fluxo'] = experiments_lists[2]

      ##########################################
      # 'CRAFT -> ESRGAN -> OpenCV -> Tesseract'
      ##########################################

      ersgan_opencv_tesseract = dataset[dataset['fluxo'] == fluxo_list[3]]

      ersgan_opencv_tesseract_result  = ersgan_opencv_tesseract.groupby('filter').agg(
          {
              'wer' + label: ['mean', 'std'],
              'cer' + label: ['mean', 'std'],
              'wil' + label: ['mean', 'std'],
              'wip' + label: ['mean', 'std']
          },
      )
      ersgan_opencv_tesseract_result = ersgan_opencv_tesseract_result.reset_index()
      ersgan_opencv_tesseract_result.columns = ['_'.join(col).strip() for col in ersgan_opencv_tesseract_result.columns.values]
      ersgan_opencv_tesseract_result['fluxo'] = experiments_lists[3]

      return opencv_easyocr_result_df, opencv_tesseract_result, ersgan_opencv_easyocr_result, ersgan_opencv_tesseract_result


def get_groupby_timeout(dataset, experiments_lists, fluxo_list):
      ##########################################
      # 'OpenCV[todos preproc] -> EASYOCR'
      ##########################################

      opencv_easyocr = dataset[dataset['fluxo'] == fluxo_list[0]]

      opencv_easyocr_time_total = opencv_easyocr.groupby('filter').agg(
          {
              'time_total': ['mean', 'std'],
          }
      )
      opencv_easyocr_time_total = opencv_easyocr_time_total.reset_index()
      opencv_easyocr_time_total.columns = ['_'.join(col).strip() for col in opencv_easyocr_time_total.columns.values]
      opencv_easyocr_time_total['fluxo'] = experiments_lists[0]

      ##########################################
      # 'OpenCV[todos preproc] -> Tesseract'
      ##########################################

      opencv_tesseract = dataset[dataset['fluxo'] == fluxo_list[1]]

      opencv_tesseract_time_total = opencv_tesseract.groupby('filter').agg(
          {
              'time_total': ['mean', 'std'],
          }
      )
      opencv_tesseract_time_total = opencv_tesseract_time_total.reset_index()
      opencv_tesseract_time_total.columns = ['_'.join(col).strip() for col in opencv_tesseract_time_total.columns.values]
      opencv_tesseract_time_total['fluxo'] = experiments_lists[1]


      ##########################################
      # 'CRAFT -> ESRGAN -> OpenCV -> EASYOCR'
      ##########################################

      ersgan_opencv_easyocr = dataset[dataset['fluxo'] == fluxo_list[2]]

      ersgan_opencv_easyocr_time_total = ersgan_opencv_easyocr.groupby('filter').agg(
          {
              'time_total': ['mean', 'std'],
          }
      )
      ersgan_opencv_easyocr_time_total = ersgan_opencv_easyocr_time_total.reset_index()
      ersgan_opencv_easyocr_time_total.columns = ['_'.join(col).strip() for col in ersgan_opencv_easyocr_time_total.columns.values]
      ersgan_opencv_easyocr_time_total['fluxo'] = experiments_lists[2]

      ##########################################
      # 'CRAFT -> ESRGAN -> OpenCV -> Tesseract'
      ##########################################

      ersgan_opencv_tesseract = dataset[dataset['fluxo'] == fluxo_list[3]]

      ersgan_opencv_tesseract_time_total = ersgan_opencv_tesseract.groupby('filter').agg(
          {
              'time_total': ['mean', 'std'],
          }
      )

      ersgan_opencv_tesseract_time_total = ersgan_opencv_tesseract_time_total.reset_index()
      ersgan_opencv_tesseract_time_total.columns = ['_'.join(col).strip() for col in ersgan_opencv_tesseract_time_total.columns.values]
      ersgan_opencv_tesseract_time_total['fluxo'] = experiments_lists[3]

      return opencv_easyocr_time_total, opencv_tesseract_time_total, ersgan_opencv_easyocr_time_total, ersgan_opencv_tesseract_time_total

## Results with predictions and processed labels

In [None]:
def get_process_dataset_lowercase(dataset, label):
    dataset = dataset.dropna(subset=[label])
    dataset['prediction'] = dataset['prediction'].fillna('')
    dataset['prediction'] = dataset['prediction'].str.lower()
    dataset[label] = dataset[label].fillna('')
    dataset[label] = dataset[label].str.lower()

    return dataset

def get_metrics_lowercase(data_df, suffix_value, label):

  for index, row in data_df.iterrows():
    data_df.at[index, 'wer' + suffix_value] = wer(row[label], row['prediction'] )
    data_df.at[index, 'cer' + suffix_value] = cer(row[label], row['prediction'] )
    data_df.at[index, 'wil' + suffix_value] = wil(row[label], row['prediction'] )
    data_df.at[index, 'wip' + suffix_value] = wip(row[label], row['prediction'] )
    data_df.at[index, 'mer' + suffix_value] = mer(row[label], row['prediction'] )

  return data_df


def get_process_dataset_without_symbols(dataset, label):
    dataset = dataset.dropna(subset=[label])
    dataset[label] = dataset[label].str.replace(r'[^\w\s]', '', regex=True)
    dataset['prediction'] = dataset['prediction'].fillna('').str.replace(r'[^\w\s]', '', regex=True)
    dataset.dropna(subset=[label], inplace=True)

    return dataset

### Validating Regex

In [None]:
import re
text = 'OLAS, asdf23 4 # $% @\/;., * +asdfEFA9*((%&)) ;.=-'
cleaned_text = re.sub(r'[^\w\s]', '', text)
print(cleaned_text)

OLAS asdf23 4     asdfEFA9 


In [None]:
pd.options.mode.chained_assignment = None


# BID - IDENTITY CARD

In [None]:
result_filtered_BID = get_bid_dataset()
result_filtered_BID

Unnamed: 0,path,filename,filter,prediction,cv2 time,prediction time,words,wer,cer,wil,wip,mer,fluxo,time_total
0,/content/crop_craft-images/CNH_Frente,res_00003604_in_01.jpg,original,{RE2OGECà],0.000000,0.027089,REPUBLICA,1.0,0.888889,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR,0.027089
1,/content/crop_craft-images/CNH_Frente,res_00003604_in_01.jpg,image_full_gray,R2202sca,0.000032,0.022099,REPUBLICA,1.0,0.888889,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR,0.022131
2,/content/crop_craft-images/CNH_Frente,res_00003604_in_01.jpg,limiarização_de_otsu,REFTGG ch,0.000020,0.022274,REPUBLICA,2.0,0.777778,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR,0.022293
3,/content/crop_craft-images/CNH_Frente,res_00003604_in_01.jpg,limiarização_binária,202t648224,0.000004,0.045176,REPUBLICA,1.0,1.111111,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR,0.045180
4,/content/crop_craft-images/CNH_Frente,res_00003604_in_01.jpg,bilateral,Kaca,0.001022,0.046842,REPUBLICA,1.0,1.000000,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR,0.047864
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37730,/content/crop_craft-images/CPF_Frente,res_00010904_in_26.jpg,original,24/06/1965,0.000000,0.176564,24/06/1965,0.0,0.000000,0.0,1.0,0.0,CRAFT -> ESRGAN -> OpenCV -> Tesseract,0.176564
37731,/content/crop_craft-images/CPF_Frente,res_00010904_in_26.jpg,image_full_gray,pos MAO Lo TM RS Tejo),0.000161,0.137259,24/06/1965,6.0,2.200000,1.0,0.0,1.0,CRAFT -> ESRGAN -> OpenCV -> Tesseract,0.137419
37732,/content/crop_craft-images/CPF_Frente,res_00010904_in_26.jpg,limiarização_de_otsu,pos MAO Lo TM RS Tejo),0.000252,0.137163,24/06/1965,6.0,2.200000,1.0,0.0,1.0,CRAFT -> ESRGAN -> OpenCV -> Tesseract,0.137415
37733,/content/crop_craft-images/CPF_Frente,res_00010904_in_26.jpg,limiarização_binária,,0.000027,0.102696,24/06/1965,1.0,1.000000,1.0,0.0,1.0,CRAFT -> ESRGAN -> OpenCV -> Tesseract,0.102723


## Default - No processing

In [None]:
result_filtered_BID = get_bid_dataset()

exp_list = ['CRAFT -> OpenCV[todos preproc] -> EASYOCR',
            'CRAFT -> OpenCV[todos preproc] -> Tesseract',
            'CRAFT -> ESRGAN -> OpenCV -> EASYOCR',
            'CRAFT -> ESRGAN -> OpenCV -> Tesseract']

fluxo_list = ['OpenCV[todos preproc] -> EASYOCR',
              'OpenCV[todos preproc] -> Tesseract',
              'CRAFT -> ESRGAN -> OpenCV -> EASYOCR',
              'CRAFT -> ESRGAN -> OpenCV -> Tesseract']

# default
opencv_easyocr_result_df, opencv_tesseract_result, ersgan_opencv_easyocr_result, ersgan_opencv_tesseract_result = get_groupby_results(result_filtered_BID, exp_list, fluxo_list)
opencv_easyocr_time_total, opencv_tesseract_time_total, ersgan_opencv_easyocr_time_total, ersgan_opencv_tesseract_time_total  = get_groupby_timeout(result_filtered_BID, exp_list, fluxo_list)

result_identity_card = pd.concat([opencv_easyocr_result_df, opencv_tesseract_result, ersgan_opencv_easyocr_result, ersgan_opencv_tesseract_result], ignore_index=True)
result_identity_card_time_total = pd.concat([opencv_easyocr_time_total, opencv_tesseract_time_total, ersgan_opencv_easyocr_time_total, ersgan_opencv_tesseract_time_total], ignore_index=True)

#results
display(result_identity_card.style.background_gradient(cmap='coolwarm'))
display(result_identity_card_time_total.style.background_gradient(cmap='coolwarm'))

Unnamed: 0,filter_,wer_mean,wer_std,cer_mean,cer_std,wil_mean,wil_std,wip_mean,wip_std,fluxo
0,bilateral,0.756998,0.549599,0.559238,0.445395,0.71352,0.451922,0.28648,0.451922,CRAFT -> OpenCV[todos preproc] -> EASYOCR
1,image_full_gray,0.506849,0.533263,0.296577,0.394953,0.488237,0.499004,0.511763,0.499004,CRAFT -> OpenCV[todos preproc] -> EASYOCR
2,limiarização_binária,0.784395,0.505011,0.59078,0.454271,0.749256,0.433227,0.250744,0.433227,CRAFT -> OpenCV[todos preproc] -> EASYOCR
3,limiarização_de_otsu,0.834127,0.472141,0.684366,0.448957,0.804348,0.396257,0.195652,0.396257,CRAFT -> OpenCV[todos preproc] -> EASYOCR
4,original,0.488088,0.534429,0.282475,0.387253,0.469029,0.498442,0.530971,0.498442,CRAFT -> OpenCV[todos preproc] -> EASYOCR
5,bilateral,0.89994,0.33221,0.859554,0.337986,0.891007,0.311724,0.108993,0.311724,CRAFT -> OpenCV[todos preproc] -> Tesseract
6,image_full_gray,0.847826,0.411667,0.75488,0.415486,0.827923,0.37508,0.172077,0.37508,CRAFT -> OpenCV[todos preproc] -> Tesseract
7,limiarização_binária,0.817749,0.47852,0.675171,0.459704,0.782807,0.410325,0.217193,0.410325,CRAFT -> OpenCV[todos preproc] -> Tesseract
8,limiarização_de_otsu,0.993746,0.494417,0.884777,0.341119,0.907336,0.289112,0.092664,0.289112,CRAFT -> OpenCV[todos preproc] -> Tesseract
9,original,0.84187,0.430695,0.742225,0.421993,0.818989,0.383014,0.181011,0.383014,CRAFT -> OpenCV[todos preproc] -> Tesseract


Unnamed: 0,filter_,time_total_mean,time_total_std,fluxo
0,bilateral,0.01158,0.005168,CRAFT -> OpenCV[todos preproc] -> EASYOCR
1,image_full_gray,0.012855,0.003958,CRAFT -> OpenCV[todos preproc] -> EASYOCR
2,limiarização_binária,0.011569,0.005484,CRAFT -> OpenCV[todos preproc] -> EASYOCR
3,limiarização_de_otsu,0.010267,0.00539,CRAFT -> OpenCV[todos preproc] -> EASYOCR
4,original,0.014121,0.010026,CRAFT -> OpenCV[todos preproc] -> EASYOCR
5,bilateral,0.098496,0.004405,CRAFT -> OpenCV[todos preproc] -> Tesseract
6,image_full_gray,0.099411,0.0048,CRAFT -> OpenCV[todos preproc] -> Tesseract
7,limiarização_binária,0.099923,0.004382,CRAFT -> OpenCV[todos preproc] -> Tesseract
8,limiarização_de_otsu,0.099927,0.005276,CRAFT -> OpenCV[todos preproc] -> Tesseract
9,original,0.100483,0.005211,CRAFT -> OpenCV[todos preproc] -> Tesseract


## Texts in lowercase

In [None]:
exp_list = ['CRAFT -> OpenCV[todos preproc] -> EASYOCR',
            'CRAFT -> OpenCV[todos preproc] -> Tesseract',
            'CRAFT -> ESRGAN -> OpenCV -> EASYOCR',
            'CRAFT -> ESRGAN -> OpenCV -> Tesseract']

fluxo_list = ['OpenCV[todos preproc] -> EASYOCR',
              'OpenCV[todos preproc] -> Tesseract',
              'CRAFT -> ESRGAN -> OpenCV -> EASYOCR',
              'CRAFT -> ESRGAN -> OpenCV -> Tesseract']


# Com textos processados em lowercase
result_lowercase_BID = get_bid_dataset()

result_lowercase_BID = get_process_dataset_lowercase(result_lowercase_BID, 'words')

result_lowercase_BID = get_metrics_lowercase(result_lowercase_BID, '_lowercase', 'words')

opencv_easyocr_result_df, opencv_tesseract_result, ersgan_opencv_easyocr_result, ersgan_opencv_tesseract_result = get_groupby_results(result_lowercase_BID, exp_list, fluxo_list, '_lowercase')

result_identity_card_lowercase = pd.concat([opencv_easyocr_result_df, opencv_tesseract_result, ersgan_opencv_easyocr_result, ersgan_opencv_tesseract_result], ignore_index=True)

# results
display(result_identity_card_lowercase.style.background_gradient(cmap='coolwarm'))

Unnamed: 0,filter_,wer_lowercase_mean,wer_lowercase_std,cer_lowercase_mean,cer_lowercase_std,wil_lowercase_mean,wil_lowercase_std,wip_lowercase_mean,wip_lowercase_std,fluxo
0,bilateral,0.718117,0.566228,0.490908,0.446716,0.674613,0.468341,0.325387,0.468341,CRAFT -> OpenCV[todos preproc] -> EASYOCR
1,image_full_gray,0.455304,0.531449,0.227296,0.346151,0.436681,0.495107,0.563319,0.495107,CRAFT -> OpenCV[todos preproc] -> EASYOCR
2,limiarização_binária,0.760429,0.517835,0.548457,0.449113,0.725268,0.446178,0.274732,0.446178,CRAFT -> OpenCV[todos preproc] -> EASYOCR
3,limiarização_de_otsu,0.810191,0.485813,0.638639,0.44817,0.780989,0.412797,0.219011,0.412797,CRAFT -> OpenCV[todos preproc] -> EASYOCR
4,original,0.437128,0.529745,0.2039,0.322613,0.418355,0.492529,0.581645,0.492529,CRAFT -> OpenCV[todos preproc] -> EASYOCR
5,bilateral,0.899285,0.333016,0.857452,0.338778,0.890346,0.312552,0.109654,0.312552,CRAFT -> OpenCV[todos preproc] -> Tesseract
6,image_full_gray,0.845352,0.413775,0.749576,0.416442,0.825437,0.377239,0.174563,0.377239,CRAFT -> OpenCV[todos preproc] -> Tesseract
7,limiarização_binária,0.810489,0.483315,0.651867,0.452945,0.775526,0.415251,0.224474,0.415251,CRAFT -> OpenCV[todos preproc] -> Tesseract
8,limiarização_de_otsu,0.992551,0.495752,0.876383,0.338899,0.906089,0.290861,0.093911,0.290861,CRAFT -> OpenCV[todos preproc] -> Tesseract
9,original,0.839392,0.432688,0.735725,0.422179,0.816498,0.385075,0.183502,0.385075,CRAFT -> OpenCV[todos preproc] -> Tesseract


## Texts in lowercase and removed special characters and punctuations

In [None]:
import numpy as np

exp_list = ['CRAFT -> OpenCV[todos preproc] -> EASYOCR',
            'CRAFT -> OpenCV[todos preproc] -> Tesseract',
            'CRAFT -> ESRGAN -> OpenCV -> EASYOCR',
            'CRAFT -> ESRGAN -> OpenCV -> Tesseract']

fluxo_list = ['OpenCV[todos preproc] -> EASYOCR',
              'OpenCV[todos preproc] -> Tesseract',
              'CRAFT -> ESRGAN -> OpenCV -> EASYOCR',
              'CRAFT -> ESRGAN -> OpenCV -> Tesseract']


# Com textos processados em lowercase
result_lowercase_BID = get_bid_dataset()

result_lowercase_BID = get_process_dataset_lowercase(result_lowercase_BID, 'words')
result_lowercase_BID = get_process_dataset_without_symbols(result_lowercase_BID, 'words')
result_lowercase_BID['words'] = result_lowercase_BID['words'].apply(lambda x: None if x.isspace() else x)
result_lowercase_BID.dropna(subset=['words'], inplace=True)

result_lowercase_BID = get_metrics_lowercase(result_lowercase_BID, '_no_schar', 'words')

opencv_easyocr_result_df, opencv_tesseract_result, ersgan_opencv_easyocr_result, ersgan_opencv_tesseract_result = get_groupby_results(result_lowercase_BID, exp_list, fluxo_list, '_no_schar')

result_identity_card_lowercase_no_special = pd.concat([opencv_easyocr_result_df, opencv_tesseract_result, ersgan_opencv_easyocr_result, ersgan_opencv_tesseract_result], ignore_index=True)

# results
display(result_identity_card_lowercase_no_special.style.background_gradient(cmap='coolwarm'))

Unnamed: 0,filter_,wer_no_schar_mean,wer_no_schar_std,cer_no_schar_mean,cer_no_schar_std,wil_no_schar_mean,wil_no_schar_std,wip_no_schar_mean,wip_no_schar_std,fluxo
0,bilateral,0.670644,0.557283,0.480576,0.453708,0.637828,0.480461,0.362172,0.480461,CRAFT -> OpenCV[todos preproc] -> EASYOCR
1,image_full_gray,0.387232,0.503828,0.20728,0.34364,0.378431,0.484563,0.621569,0.484563,CRAFT -> OpenCV[todos preproc] -> EASYOCR
2,limiarização_binária,0.707637,0.509458,0.52354,0.448943,0.686158,0.464192,0.313842,0.464192,CRAFT -> OpenCV[todos preproc] -> EASYOCR
3,limiarização_de_otsu,0.765215,0.484914,0.618281,0.450229,0.745823,0.435184,0.254177,0.435184,CRAFT -> OpenCV[todos preproc] -> EASYOCR
4,original,0.36963,0.497315,0.182213,0.316255,0.362172,0.480461,0.637828,0.480461,CRAFT -> OpenCV[todos preproc] -> EASYOCR
5,bilateral,0.882458,0.35056,0.853875,0.347647,0.874105,0.33183,0.125895,0.33183,CRAFT -> OpenCV[todos preproc] -> Tesseract
6,image_full_gray,0.800716,0.436362,0.736545,0.429369,0.787739,0.40798,0.212261,0.40798,CRAFT -> OpenCV[todos preproc] -> Tesseract
7,limiarização_binária,0.757757,0.498145,0.635224,0.465413,0.731504,0.442636,0.268496,0.442636,CRAFT -> OpenCV[todos preproc] -> Tesseract
8,limiarização_de_otsu,0.978819,0.501043,0.869481,0.341162,0.89464,0.30671,0.10536,0.30671,CRAFT -> OpenCV[todos preproc] -> Tesseract
9,original,0.785203,0.447996,0.719201,0.437283,0.771629,0.418885,0.228371,0.418885,CRAFT -> OpenCV[todos preproc] -> Tesseract


# IIIT5K

In [None]:
result_filtered_IIIT5K = get_IIIT5K_dataset()
result_filtered_IIIT5K.head(50)

Unnamed: 0,path,filename,filter,prediction,cv2 time,prediction time,words,wer,cer,wil,wip,mer,fluxo,time_total
0,/content/IIIT5K/IIIT5K/test/,1002_1.png,original,PRIVATE,0.0,0.122835,PRIVATE,0.0,0.0,0.0,1.0,0.0,OpenCV[todos preproc] -> EASYOCR,0.122835
1,/content/IIIT5K/IIIT5K/test/,1002_1.png,image_full_gray,PRIVATE,3e-05,0.014177,PRIVATE,0.0,0.0,0.0,1.0,0.0,OpenCV[todos preproc] -> EASYOCR,0.014207
2,/content/IIIT5K/IIIT5K/test/,1002_1.png,limiarização_de_otsu,PRIVATE,4.4e-05,0.013822,PRIVATE,0.0,0.0,0.0,1.0,0.0,OpenCV[todos preproc] -> EASYOCR,0.013867
3,/content/IIIT5K/IIIT5K/test/,1002_1.png,limiarização_binária,PRIVATE,5e-06,0.013535,PRIVATE,0.0,0.0,0.0,1.0,0.0,OpenCV[todos preproc] -> EASYOCR,0.01354
4,/content/IIIT5K/IIIT5K/test/,1002_1.png,bilateral,PRIVATE,0.001289,0.013451,PRIVATE,0.0,0.0,0.0,1.0,0.0,OpenCV[todos preproc] -> EASYOCR,0.01474
5,/content/IIIT5K/IIIT5K/test/,1002_2.png,original,PARKING,0.0,0.092546,PARKING,0.0,0.0,0.0,1.0,0.0,OpenCV[todos preproc] -> EASYOCR,0.092546
6,/content/IIIT5K/IIIT5K/test/,1002_2.png,image_full_gray,PARKING,2.9e-05,0.014146,PARKING,0.0,0.0,0.0,1.0,0.0,OpenCV[todos preproc] -> EASYOCR,0.014175
7,/content/IIIT5K/IIIT5K/test/,1002_2.png,limiarização_de_otsu,PARKING,2e-05,0.013732,PARKING,0.0,0.0,0.0,1.0,0.0,OpenCV[todos preproc] -> EASYOCR,0.013752
8,/content/IIIT5K/IIIT5K/test/,1002_2.png,limiarização_binária,PARKING,5e-06,0.013546,PARKING,0.0,0.0,0.0,1.0,0.0,OpenCV[todos preproc] -> EASYOCR,0.013551
9,/content/IIIT5K/IIIT5K/test/,1002_2.png,bilateral,PARKING,0.001086,0.013319,PARKING,0.0,0.0,0.0,1.0,0.0,OpenCV[todos preproc] -> EASYOCR,0.014405


### Default - No processing

In [None]:
result_filtered_IIIT5K = get_IIIT5K_dataset()

exp_list = ['OpenCV[todos preproc] -> EASYOCR',
            'OpenCV[todos preproc] -> Tesseract',
            'ESRGAN -> OpenCV -> EASYOCR',
            'ESRGAN -> OpenCV -> Tesseract']

fluxo_list = ['OpenCV[todos preproc] -> EASYOCR',
              'OpenCV[todos preproc] -> Tesseract',
              'ESRGAN -> OpenCV -> EASYOCR',
              'ESRGAN -> OpenCV -> Tesseract']

opencv_easyocr_result_df, opencv_tesseract_result, ersgan_opencv_easyocr_result, ersgan_opencv_tesseract_result = get_groupby_results(result_filtered_IIIT5K, exp_list, fluxo_list)

opencv_easyocr_time_total, opencv_tesseract_time_total, ersgan_opencv_easyocr_time_total, ersgan_opencv_tesseract_time_total  = get_groupby_timeout(result_filtered_IIIT5K, exp_list, fluxo_list)

result_IIIT5K = pd.concat([opencv_easyocr_result_df, opencv_tesseract_result, ersgan_opencv_easyocr_result, ersgan_opencv_tesseract_result], ignore_index=True)
result_IIIT5K_time_total = pd.concat([opencv_easyocr_time_total, opencv_tesseract_time_total, ersgan_opencv_easyocr_time_total, ersgan_opencv_tesseract_time_total], ignore_index=True)

display(result_IIIT5K.style.background_gradient(cmap='coolwarm'))
display(result_IIIT5K_time_total.style.background_gradient(cmap='coolwarm'))

Unnamed: 0,filter_,wer_mean,wer_std,cer_mean,cer_std,wil_mean,wil_std,wip_mean,wip_std,fluxo
0,bilateral,0.809667,0.693494,0.612048,0.435014,0.734833,0.441212,0.265167,0.441212,OpenCV[todos preproc] -> EASYOCR
1,image_full_gray,0.753333,0.720645,0.536044,0.450458,0.676333,0.467596,0.323667,0.467596,OpenCV[todos preproc] -> EASYOCR
2,limiarização_binária,0.838,0.658205,0.639187,0.432689,0.766667,0.422431,0.233333,0.422431,OpenCV[todos preproc] -> EASYOCR
3,limiarização_de_otsu,0.780667,0.673333,0.570393,0.439732,0.716833,0.450334,0.283167,0.450334,OpenCV[todos preproc] -> EASYOCR
4,original,0.755,0.715411,0.535694,0.450316,0.678167,0.46699,0.321833,0.46699,OpenCV[todos preproc] -> EASYOCR
5,bilateral,1.009333,0.536043,0.893821,0.388905,0.908222,0.288054,0.091778,0.288054,OpenCV[todos preproc] -> Tesseract
6,image_full_gray,1.009667,0.615656,0.878848,0.434388,0.890556,0.310791,0.109444,0.310791,OpenCV[todos preproc] -> Tesseract
7,limiarização_binária,1.037,0.592521,0.916098,0.455397,0.920156,0.269937,0.079844,0.269937,OpenCV[todos preproc] -> Tesseract
8,limiarização_de_otsu,1.003333,0.615453,0.871599,0.434161,0.888933,0.313385,0.111067,0.313385,OpenCV[todos preproc] -> Tesseract
9,original,1.0,0.582048,0.885154,0.478054,0.894056,0.306476,0.105944,0.306476,OpenCV[todos preproc] -> Tesseract


Unnamed: 0,filter_,time_total_mean,time_total_std,fluxo
0,bilateral,0.012939,0.010043,OpenCV[todos preproc] -> EASYOCR
1,image_full_gray,0.012657,0.007229,OpenCV[todos preproc] -> EASYOCR
2,limiarização_binária,0.011957,0.007233,OpenCV[todos preproc] -> EASYOCR
3,limiarização_de_otsu,0.012694,0.007085,OpenCV[todos preproc] -> EASYOCR
4,original,0.016197,0.019089,OpenCV[todos preproc] -> EASYOCR
5,bilateral,0.111453,0.012659,OpenCV[todos preproc] -> Tesseract
6,image_full_gray,0.111863,0.012406,OpenCV[todos preproc] -> Tesseract
7,limiarização_binária,0.109337,0.007384,OpenCV[todos preproc] -> Tesseract
8,limiarização_de_otsu,0.109902,0.007372,OpenCV[todos preproc] -> Tesseract
9,original,0.114535,0.022482,OpenCV[todos preproc] -> Tesseract


## Texts in lowercase

In [None]:
exp_list = ['OpenCV[todos preproc] -> EASYOCR',
            'OpenCV[todos preproc] -> Tesseract',
            'ESRGAN -> OpenCV -> EASYOCR',
            'ESRGAN -> OpenCV -> Tesseract']

fluxo_list = ['OpenCV[todos preproc] -> EASYOCR',
              'OpenCV[todos preproc] -> Tesseract',
              'ESRGAN -> OpenCV -> EASYOCR',
              'ESRGAN -> OpenCV -> Tesseract']

# Com textos processados em lowercase
result_filtered_IIIT5K = get_IIIT5K_dataset()

result_filtered_IIIT5K = get_process_dataset_lowercase(result_filtered_IIIT5K, 'words')

result_filtered_IIIT5K = get_metrics_lowercase(result_filtered_IIIT5K, '_lowercase', 'words')

opencv_easyocr_result_df, opencv_tesseract_result, ersgan_opencv_easyocr_result, ersgan_opencv_tesseract_result = get_groupby_results(result_filtered_IIIT5K, exp_list, fluxo_list, '_lowercase')

result_IIIT5K_lowercase = pd.concat([opencv_easyocr_result_df, opencv_tesseract_result, ersgan_opencv_easyocr_result, ersgan_opencv_tesseract_result], ignore_index=True)

# results
display(result_IIIT5K_lowercase.style.background_gradient(cmap='coolwarm'))

Unnamed: 0,filter_,wer_lowercase_mean,wer_lowercase_std,cer_lowercase_mean,cer_lowercase_std,wil_lowercase_mean,wil_lowercase_std,wip_lowercase_mean,wip_lowercase_std,fluxo
0,bilateral,0.576667,0.75519,0.330963,0.420023,0.502,0.499746,0.498,0.499746,OpenCV[todos preproc] -> EASYOCR
1,image_full_gray,0.453,0.761129,0.198177,0.348941,0.3765,0.483986,0.6235,0.483986,OpenCV[todos preproc] -> EASYOCR
2,limiarização_binária,0.633667,0.726849,0.372418,0.427816,0.562667,0.495468,0.437333,0.495468,OpenCV[todos preproc] -> EASYOCR
3,limiarização_de_otsu,0.517333,0.725172,0.247413,0.362536,0.455,0.497049,0.545,0.497049,OpenCV[todos preproc] -> EASYOCR
4,original,0.451,0.755727,0.198335,0.348412,0.375,0.483514,0.625,0.483514,OpenCV[todos preproc] -> EASYOCR
5,bilateral,0.888,0.624168,0.747313,0.479111,0.788194,0.407478,0.211806,0.407478,OpenCV[todos preproc] -> Tesseract
6,image_full_gray,0.862,0.708842,0.694721,0.520798,0.744111,0.434731,0.255889,0.434731,OpenCV[todos preproc] -> Tesseract
7,limiarização_binária,0.928,0.673028,0.767123,0.531252,0.811878,0.389655,0.188122,0.389655,OpenCV[todos preproc] -> Tesseract
8,limiarização_de_otsu,0.862,0.706958,0.687477,0.516983,0.747933,0.433442,0.252067,0.433442,OpenCV[todos preproc] -> Tesseract
9,original,0.859,0.674738,0.705269,0.560927,0.753972,0.429348,0.246028,0.429348,OpenCV[todos preproc] -> Tesseract


## Texts in lowercase and removed special characters and punctuations

In [None]:
exp_list = ['OpenCV[todos preproc] -> EASYOCR',
            'OpenCV[todos preproc] -> Tesseract',
            'ESRGAN -> OpenCV -> EASYOCR',
            'ESRGAN -> OpenCV -> Tesseract']

fluxo_list = ['OpenCV[todos preproc] -> EASYOCR',
              'OpenCV[todos preproc] -> Tesseract',
              'ESRGAN -> OpenCV -> EASYOCR',
              'ESRGAN -> OpenCV -> Tesseract']

# Com textos processados em lowercase
result_filtered_IIIT5K = get_IIIT5K_dataset()

result_filtered_IIIT5K = get_process_dataset_lowercase(result_filtered_IIIT5K, 'words')
result_filtered_IIIT5K = get_process_dataset_without_symbols(result_filtered_IIIT5K, 'words')
result_filtered_IIIT5K['words'] = result_filtered_IIIT5K['words'].apply(lambda x: None if x.isspace() else x)
result_filtered_IIIT5K.dropna(subset=['words'], inplace=True)

result_filtered_IIIT5K = get_metrics_lowercase(result_filtered_IIIT5K, '_no_schar', 'words')

opencv_easyocr_result_df, opencv_tesseract_result, ersgan_opencv_easyocr_result, ersgan_opencv_tesseract_result = get_groupby_results(result_filtered_IIIT5K, exp_list, fluxo_list, '_no_schar')

result_IIIT5K_lowercase_no_special = pd.concat([opencv_easyocr_result_df, opencv_tesseract_result, ersgan_opencv_easyocr_result, ersgan_opencv_tesseract_result], ignore_index=True)

# results
display(result_IIIT5K_lowercase_no_special.style.background_gradient(cmap='coolwarm'))

Unnamed: 0,filter_,wer_no_schar_mean,wer_no_schar_std,cer_no_schar_mean,cer_no_schar_std,wil_no_schar_mean,wil_no_schar_std,wip_no_schar_mean,wip_no_schar_std,fluxo
0,bilateral,0.528333,0.700971,0.315007,0.419076,0.466333,0.498948,0.533667,0.498948,OpenCV[todos preproc] -> EASYOCR
1,image_full_gray,0.386333,0.694676,0.177884,0.341212,0.323667,0.467953,0.676333,0.467953,OpenCV[todos preproc] -> EASYOCR
2,limiarização_binária,0.578667,0.673273,0.350933,0.421087,0.5225,0.499493,0.4775,0.499493,OpenCV[todos preproc] -> EASYOCR
3,limiarização_de_otsu,0.457333,0.665077,0.225934,0.353905,0.407833,0.491259,0.592167,0.491259,OpenCV[todos preproc] -> EASYOCR
4,original,0.386,0.692217,0.178332,0.340659,0.324,0.468078,0.676,0.468078,OpenCV[todos preproc] -> EASYOCR
5,bilateral,0.835,0.621748,0.726825,0.479867,0.746556,0.434539,0.253444,0.434539,OpenCV[todos preproc] -> Tesseract
6,image_full_gray,0.793333,0.712361,0.669528,0.521305,0.686722,0.463194,0.313278,0.463194,OpenCV[todos preproc] -> Tesseract
7,limiarização_binária,0.877,0.672326,0.74799,0.527967,0.768989,0.421102,0.231011,0.421102,OpenCV[todos preproc] -> Tesseract
8,limiarização_de_otsu,0.797,0.711308,0.663176,0.517971,0.692667,0.461285,0.307333,0.461285,OpenCV[todos preproc] -> Tesseract
9,original,0.786,0.666097,0.677895,0.551782,0.693861,0.460284,0.306139,0.460284,OpenCV[todos preproc] -> Tesseract


# SVHN

In [None]:
result_filtered_svhn = get_svhn_dataset()
result_filtered_svhn

Unnamed: 0,label,filter,prediction,cv2 time,prediction time,wer,cer,wil,wip,mer,fluxo,time_total
0,31,original,,0.000000,0.008890,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR,0.008890
1,31,image_full_gray,,0.000014,0.005503,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR,0.005517
2,31,limiarização_de_otsu,,0.000012,0.005413,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR,0.005426
3,31,limiarização_binária,,0.000003,0.005227,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR,0.005230
4,31,bilateral,,0.000283,0.005213,1.0,1.0,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR,0.005497
...,...,...,...,...,...,...,...,...,...,...,...,...
261355,183,original,\v,0.000000,0.147386,1.0,1.0,1.0,0.0,1.0,ESRGAN -> OpenCV -> Tesseract,0.147386
261356,183,image_full_gray,vo,0.000178,0.121544,1.0,1.0,1.0,0.0,1.0,ESRGAN -> OpenCV -> Tesseract,0.121722
261357,183,limiarização_de_otsu,vo,0.000185,0.120769,1.0,1.0,1.0,0.0,1.0,ESRGAN -> OpenCV -> Tesseract,0.120954
261358,183,limiarização_binária,b2,0.000026,0.113375,1.0,1.0,1.0,0.0,1.0,ESRGAN -> OpenCV -> Tesseract,0.113401


## Default - No Processing

In [None]:
result_filtered_svhn = get_svhn_dataset()

exp_list = ['OpenCV[todos preproc] -> EASYOCR',
            'OpenCV[todos preproc] -> Tesseract',
            'ESRGAN -> OpenCV -> EASYOCR',
            'ESRGAN -> OpenCV -> Tesseract']

fluxo_list = ['OpenCV[todos preproc] -> EASYOCR',
              'OpenCV[todos preproc] -> Tesseract',
              'ESRGAN -> OpenCV -> EASYOCR',
              'ESRGAN -> OpenCV -> Tesseract']

opencv_easyocr_result_df, opencv_tesseract_result, ersgan_opencv_easyocr_result, ersgan_opencv_tesseract_result = get_groupby_results(result_filtered_svhn, exp_list, fluxo_list)

opencv_easyocr_time_total, opencv_tesseract_time_total, ersgan_opencv_easyocr_time_total, ersgan_opencv_tesseract_time_total  = get_groupby_timeout(result_filtered_svhn, exp_list, fluxo_list)

result_SVHN = pd.concat([opencv_easyocr_result_df, opencv_tesseract_result, ersgan_opencv_easyocr_result, ersgan_opencv_tesseract_result], ignore_index=True)
result_SVHN_time_total = pd.concat([opencv_easyocr_time_total, opencv_tesseract_time_total, ersgan_opencv_easyocr_time_total, ersgan_opencv_tesseract_time_total], ignore_index=True)

display(result_SVHN.style.background_gradient(cmap='coolwarm'))
display(result_SVHN_time_total.style.background_gradient(cmap='coolwarm'))

Unnamed: 0,filter_,wer_mean,wer_std,cer_mean,cer_std,wil_mean,wil_std,wip_mean,wip_std,fluxo
0,bilateral,0.907178,0.307853,0.884699,0.407609,0.901617,0.2974,0.098383,0.2974,OpenCV[todos preproc] -> EASYOCR
1,image_full_gray,0.760254,0.454721,0.71676,0.595764,0.747947,0.433491,0.252053,0.433491,OpenCV[todos preproc] -> EASYOCR
2,limiarização_binária,0.917661,0.326785,0.902719,0.438079,0.902861,0.295311,0.097139,0.295311,OpenCV[todos preproc] -> EASYOCR
3,limiarização_de_otsu,0.880931,0.376343,0.86444,0.505122,0.863458,0.342347,0.136542,0.342347,OpenCV[todos preproc] -> EASYOCR
4,original,0.770049,0.450844,0.726925,0.586165,0.757155,0.428112,0.242845,0.428112,OpenCV[todos preproc] -> EASYOCR
5,bilateral,1.037573,0.358836,1.074616,0.676699,0.98854,0.103607,0.01146,0.103607,OpenCV[todos preproc] -> Tesseract
6,image_full_gray,1.056321,0.471305,1.114048,0.817537,0.965865,0.174731,0.034135,0.174731,OpenCV[todos preproc] -> Tesseract
7,limiarização_binária,1.061754,0.498806,1.12147,1.206655,0.979969,0.13595,0.020031,0.13595,OpenCV[todos preproc] -> Tesseract
8,limiarização_de_otsu,1.062825,0.472024,1.122699,0.792456,0.974677,0.151547,0.025323,0.151547,OpenCV[todos preproc] -> Tesseract
9,original,1.065963,0.512277,1.132141,0.984934,0.968728,0.166989,0.031272,0.166989,OpenCV[todos preproc] -> Tesseract


Unnamed: 0,filter_,time_total_mean,time_total_std,fluxo
0,bilateral,0.009659,0.007067,OpenCV[todos preproc] -> EASYOCR
1,image_full_gray,0.009597,0.005471,OpenCV[todos preproc] -> EASYOCR
2,limiarização_binária,0.008102,0.004704,OpenCV[todos preproc] -> EASYOCR
3,limiarização_de_otsu,0.008637,0.005796,OpenCV[todos preproc] -> EASYOCR
4,original,0.010263,0.005786,OpenCV[todos preproc] -> EASYOCR
5,bilateral,0.110595,0.010555,OpenCV[todos preproc] -> Tesseract
6,image_full_gray,0.111083,0.009769,OpenCV[todos preproc] -> Tesseract
7,limiarização_binária,0.107433,0.005644,OpenCV[todos preproc] -> Tesseract
8,limiarização_de_otsu,0.107916,0.005288,OpenCV[todos preproc] -> Tesseract
9,original,0.117117,0.021262,OpenCV[todos preproc] -> Tesseract


Para a base SVHN podemos observar:
-  O fluxo [ESRGAN -> OpenCV -> EASYOCR] para os filtros bilateral, image_full_gray e original tiveram resultados parecidos.


`>> a base SVHN é de apenas números, logo não faz necessário fazer o lowercase e o processamento dos caracteres`

# SROEI

In [None]:
result_filtered_SROEI = get_SROEI_dataset()
result_filtered_SROEI

Unnamed: 0,label,filter,prediction,cv2 time,prediction time,wer,cer,wil,wip,mer,fluxo,time_total
0,ROUNDING :,original,Rounding,0.000000,0.690343,1.000000,0.900000,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR,0.690343
1,ROUNDING :,image_full_gray,Rounding,0.000064,0.014858,1.000000,0.900000,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR,0.014922
2,ROUNDING :,limiarização_de_otsu,Rounding,0.000062,0.013562,1.000000,0.900000,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR,0.013624
3,ROUNDING :,limiarização_binária,Rounding,0.000005,0.013031,1.000000,0.900000,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR,0.013036
4,ROUNDING :,bilateral,Rounding,0.001008,0.014480,1.000000,0.900000,1.0,0.0,1.0,OpenCV[todos preproc] -> EASYOCR,0.015488
...,...,...,...,...,...,...,...,...,...,...,...,...
374075,"TAMPOI,81200 JOHOR BAHRU,JOHOR",original,"Tampot,8t 200 Johor Bahru, Johor",0.000000,0.413532,1.666667,0.666667,1.0,0.0,1.0,ESRGAN -> OpenCV -> Tesseract,0.413532
374076,"TAMPOI,81200 JOHOR BAHRU,JOHOR",image_full_gray,"Tampot,8t 200 Johor Bahru, Johor",0.000231,0.258180,1.666667,0.666667,1.0,0.0,1.0,ESRGAN -> OpenCV -> Tesseract,0.258411
374077,"TAMPOI,81200 JOHOR BAHRU,JOHOR",limiarização_de_otsu,"Tampot,8t 200 Johor Bahru, Johor",0.000910,0.261692,1.666667,0.666667,1.0,0.0,1.0,ESRGAN -> OpenCV -> Tesseract,0.262601
374078,"TAMPOI,81200 JOHOR BAHRU,JOHOR",limiarização_binária,"Tampot,81t 200 Johor Bahru, Johor",0.000044,0.214300,1.666667,0.666667,1.0,0.0,1.0,ESRGAN -> OpenCV -> Tesseract,0.214344


## default - No Processing

In [None]:
result_filtered_SROEI = get_SROEI_dataset()

exp_list = ['OpenCV[todos preproc] -> EASYOCR',
            'OpenCV[todos preproc] -> Tesseract',
            'ESRGAN -> OpenCV -> EASYOCR',
            'ESRGAN -> OpenCV -> Tesseract']

fluxo_list = ['OpenCV[todos preproc] -> EASYOCR',
              'OpenCV[todos preproc] -> Tesseract',
              'ESRGAN -> OpenCV -> EASYOCR',
              'ESRGAN -> OpenCV -> Tesseract']

opencv_easyocr_result_df, opencv_tesseract_result, ersgan_opencv_easyocr_result, ersgan_opencv_tesseract_result = get_groupby_results(result_filtered_SROEI, exp_list, fluxo_list)

opencv_easyocr_time_total, opencv_tesseract_time_total, ersgan_opencv_easyocr_time_total, ersgan_opencv_tesseract_time_total  = get_groupby_timeout(result_filtered_SROEI, exp_list, fluxo_list)

result_SROEI = pd.concat([opencv_easyocr_result_df, opencv_tesseract_result, ersgan_opencv_easyocr_result, ersgan_opencv_tesseract_result], ignore_index=True)
result_SROEI_time_total = pd.concat([opencv_easyocr_time_total, opencv_tesseract_time_total, ersgan_opencv_easyocr_time_total, ersgan_opencv_tesseract_time_total], ignore_index=True)

display(result_SROEI.style.background_gradient(cmap='coolwarm'))
display(result_SROEI_time_total.style.background_gradient(cmap='coolwarm'))

Unnamed: 0,filter_,wer_mean,wer_std,cer_mean,cer_std,wil_mean,wil_std,wip_mean,wip_std,fluxo
0,bilateral,0.844917,0.524245,0.563081,0.376782,0.788476,0.370703,0.211524,0.370703,OpenCV[todos preproc] -> EASYOCR
1,image_full_gray,0.756585,0.548093,0.479235,0.377626,0.717443,0.41025,0.282557,0.41025,OpenCV[todos preproc] -> EASYOCR
2,limiarização_binária,0.834725,0.512553,0.569122,0.375521,0.787886,0.372747,0.212114,0.372747,OpenCV[todos preproc] -> EASYOCR
3,limiarização_de_otsu,0.793156,0.589239,0.465426,0.37037,0.725747,0.407948,0.274253,0.407948,OpenCV[todos preproc] -> EASYOCR
4,original,0.756889,0.547399,0.479633,0.377363,0.71787,0.409978,0.28213,0.409978,OpenCV[todos preproc] -> EASYOCR
5,bilateral,0.916487,0.393721,0.774165,0.353232,0.896182,0.267867,0.103818,0.267867,OpenCV[todos preproc] -> Tesseract
6,image_full_gray,0.900106,0.385781,0.774497,0.355527,0.88973,0.277265,0.11027,0.277265,OpenCV[todos preproc] -> Tesseract
7,limiarização_binária,0.952325,0.406653,0.79544,0.348349,0.914435,0.242663,0.085565,0.242663,OpenCV[todos preproc] -> Tesseract
8,limiarização_de_otsu,0.999123,0.483806,0.858228,0.345121,0.925919,0.228654,0.074081,0.228654,OpenCV[todos preproc] -> Tesseract
9,original,0.900723,0.386044,0.774356,0.356488,0.890112,0.276661,0.109888,0.276661,OpenCV[todos preproc] -> Tesseract


Unnamed: 0,filter_,time_total_mean,time_total_std,fluxo
0,bilateral,0.016262,0.0088,OpenCV[todos preproc] -> EASYOCR
1,image_full_gray,0.015442,0.007245,OpenCV[todos preproc] -> EASYOCR
2,limiarização_binária,0.016182,0.008608,OpenCV[todos preproc] -> EASYOCR
3,limiarização_de_otsu,0.015834,0.006774,OpenCV[todos preproc] -> EASYOCR
4,original,0.01679,0.012067,OpenCV[todos preproc] -> EASYOCR
5,bilateral,0.114665,0.01568,OpenCV[todos preproc] -> Tesseract
6,image_full_gray,0.113934,0.014239,OpenCV[todos preproc] -> Tesseract
7,limiarização_binária,0.112982,0.01375,OpenCV[todos preproc] -> Tesseract
8,limiarização_de_otsu,0.114693,0.015732,OpenCV[todos preproc] -> Tesseract
9,original,0.116553,0.017491,OpenCV[todos preproc] -> Tesseract


Para a base SROEI podemos observar:
-  O fluxo [ESRGAN -> OpenCV -> EASYOCR] para os filtros bilateral, image_full_gray e original tiveram resultados parecidos.


## Texts in lowercase

In [None]:
result_filtered_SROEI = get_SROEI_dataset()

exp_list = ['OpenCV[todos preproc] -> EASYOCR',
            'OpenCV[todos preproc] -> Tesseract',
            'ESRGAN -> OpenCV -> EASYOCR',
            'ESRGAN -> OpenCV -> Tesseract']

fluxo_list = ['OpenCV[todos preproc] -> EASYOCR',
              'OpenCV[todos preproc] -> Tesseract',
              'ESRGAN -> OpenCV -> EASYOCR',
              'ESRGAN -> OpenCV -> Tesseract']

result_filtered_SROEI = get_process_dataset_lowercase(result_filtered_SROEI, 'label')

result_filtered_SROEI = get_metrics_lowercase(result_filtered_SROEI, '_lowercase', 'label')

opencv_easyocr_result_df, opencv_tesseract_result, ersgan_opencv_easyocr_result, ersgan_opencv_tesseract_result = get_groupby_results(result_filtered_SROEI, exp_list, fluxo_list, '_lowercase')

result_filtered_SROEI_lowercase = pd.concat([opencv_easyocr_result_df, opencv_tesseract_result, ersgan_opencv_easyocr_result, ersgan_opencv_tesseract_result], ignore_index=True)

# results
display(result_filtered_SROEI_lowercase.style.background_gradient(cmap='viridis'))

Unnamed: 0,filter_,wer_lowercase_mean,wer_lowercase_std,cer_lowercase_mean,cer_lowercase_std,wil_lowercase_mean,wil_lowercase_std,wip_lowercase_mean,wip_lowercase_std,fluxo
0,bilateral,0.724593,0.56985,0.421797,0.386348,0.672615,0.416565,0.327385,0.416565,OpenCV[todos preproc] -> EASYOCR
1,image_full_gray,0.612493,0.578227,0.325241,0.360662,0.576871,0.437465,0.423129,0.437465,OpenCV[todos preproc] -> EASYOCR
2,limiarização_binária,0.721514,0.555889,0.435632,0.384579,0.678493,0.417691,0.321507,0.417691,OpenCV[todos preproc] -> EASYOCR
3,limiarização_de_otsu,0.649051,0.625344,0.308496,0.344223,0.586527,0.438533,0.413473,0.438533,OpenCV[todos preproc] -> EASYOCR
4,original,0.612805,0.577598,0.32547,0.360524,0.57729,0.437362,0.42271,0.437362,OpenCV[todos preproc] -> EASYOCR
5,bilateral,0.772287,0.49942,0.636499,0.461315,0.751344,0.398477,0.248656,0.398477,OpenCV[todos preproc] -> Tesseract
6,image_full_gray,0.75122,0.495449,0.637708,0.465923,0.736799,0.411331,0.263201,0.411331,OpenCV[todos preproc] -> Tesseract
7,limiarização_binária,0.831528,0.501455,0.668186,0.449616,0.797272,0.366124,0.202728,0.366124,OpenCV[todos preproc] -> Tesseract
8,limiarização_de_otsu,0.895667,0.568947,0.75299,0.446568,0.822371,0.352838,0.177629,0.352838,OpenCV[todos preproc] -> Tesseract
9,original,0.751129,0.496111,0.637209,0.466745,0.736542,0.411377,0.263458,0.411377,OpenCV[todos preproc] -> Tesseract


## Texts in lowercase and removed special characters and punctuations

In [None]:
result_filtered_SROEI = get_SROEI_dataset()

exp_list = ['OpenCV[todos preproc] -> EASYOCR',
            'OpenCV[todos preproc] -> Tesseract',
            'ESRGAN -> OpenCV -> EASYOCR',
            'ESRGAN -> OpenCV -> Tesseract']

fluxo_list = ['OpenCV[todos preproc] -> EASYOCR',
              'OpenCV[todos preproc] -> Tesseract',
              'ESRGAN -> OpenCV -> EASYOCR',
              'ESRGAN -> OpenCV -> Tesseract']

result_filtered_SROEI = get_process_dataset_lowercase(result_filtered_SROEI, 'label')
result_filtered_SROEI = get_process_dataset_without_symbols(result_filtered_SROEI, 'label')
result_filtered_SROEI['label'] = result_filtered_SROEI['label'].apply(lambda x: None if x.isspace() else x)
result_filtered_SROEI['label'] = result_filtered_SROEI['label'].replace(r'^\s*$', pd.NA, regex=True)
result_filtered_SROEI = result_filtered_SROEI.dropna(subset=['label'])

result_filtered_SROEI = get_metrics_lowercase(result_filtered_SROEI, '_no_schar', 'label')

opencv_easyocr_result_df, opencv_tesseract_result, ersgan_opencv_easyocr_result, ersgan_opencv_tesseract_result = get_groupby_results(result_filtered_SROEI, exp_list, fluxo_list, '_no_schar')

result_filtered_SROEI_lowercase_no_special = pd.concat([opencv_easyocr_result_df, opencv_tesseract_result, ersgan_opencv_easyocr_result, ersgan_opencv_tesseract_result], ignore_index=True)

# results
display(result_filtered_SROEI_lowercase_no_special.style.background_gradient(cmap='viridis'))

Unnamed: 0,filter_,wer_no_schar_mean,wer_no_schar_std,cer_no_schar_mean,cer_no_schar_std,wil_no_schar_mean,wil_no_schar_std,wip_no_schar_mean,wip_no_schar_std,fluxo
0,bilateral,0.628132,0.569398,0.393286,0.394682,0.583927,0.447386,0.416073,0.447386,OpenCV[todos preproc] -> EASYOCR
1,image_full_gray,0.501563,0.556596,0.295928,0.366735,0.473031,0.450557,0.526969,0.450557,OpenCV[todos preproc] -> EASYOCR
2,limiarização_binária,0.629108,0.547685,0.406735,0.394438,0.596321,0.447031,0.403679,0.447031,OpenCV[todos preproc] -> EASYOCR
3,limiarização_de_otsu,0.536818,0.59358,0.28288,0.352019,0.488935,0.452062,0.511065,0.452062,OpenCV[todos preproc] -> EASYOCR
4,original,0.501898,0.556187,0.296095,0.36657,0.473455,0.450475,0.526545,0.450475,OpenCV[todos preproc] -> EASYOCR
5,bilateral,0.736575,0.506929,0.627617,0.46654,0.716548,0.424714,0.283452,0.424714,OpenCV[todos preproc] -> Tesseract
6,image_full_gray,0.722407,0.501776,0.631631,0.469951,0.707943,0.431654,0.292057,0.431654,OpenCV[todos preproc] -> Tesseract
7,limiarização_binária,0.799637,0.511118,0.661871,0.458235,0.766541,0.393426,0.233459,0.393426,OpenCV[todos preproc] -> Tesseract
8,limiarização_de_otsu,0.882727,0.590714,0.754412,0.461798,0.802274,0.374489,0.197726,0.374489,OpenCV[todos preproc] -> Tesseract
9,original,0.722078,0.501806,0.630746,0.469728,0.707774,0.431534,0.292226,0.431534,OpenCV[todos preproc] -> Tesseract


# All Results

### - Default

In [None]:
def style_background_gradient(val):
    if val == 'Identity Card':
        color = 'red'
    elif val == 'IIIT5K':
        color = 'blue'
    elif val == 'SVHN':
        color = 'green'
    elif val == 'SROEI':
        color = 'orange'
    else:
        color = 'white'
    return f'background-color: {color}'

In [None]:
result_identity_card['type'] = 'Identity Card'
result_IIIT5K['type'] = 'IIIT5K'
result_SVHN['type'] = 'SVHN'
result_SROEI['type'] = 'SROEI'

result_all_default = pd.concat([result_identity_card, result_IIIT5K, result_SVHN, result_SROEI], ignore_index=True)

In [None]:
result_all_default.to_csv('all_results.csv', index=False)

In [None]:
%cp -r /content/all_results.csv /content/drive/MyDrive/'Colab Notebooks'/OCR/Pos/results/

In [None]:
styled_df  = result_all_default.style.applymap(style_background_gradient, subset=['type'])
styled_df


Unnamed: 0,filter_,wer_mean,wer_std,cer_mean,cer_std,wil_mean,wil_std,wip_mean,wip_std,fluxo,type
0,bilateral,0.756998,0.549599,0.559238,0.445395,0.71352,0.451922,0.28648,0.451922,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
1,image_full_gray,0.506849,0.533263,0.296577,0.394953,0.488237,0.499004,0.511763,0.499004,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
2,limiarização_binária,0.784395,0.505011,0.59078,0.454271,0.749256,0.433227,0.250744,0.433227,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
3,limiarização_de_otsu,0.834127,0.472141,0.684366,0.448957,0.804348,0.396257,0.195652,0.396257,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
4,original,0.488088,0.534429,0.282475,0.387253,0.469029,0.498442,0.530971,0.498442,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
5,bilateral,0.89994,0.33221,0.859554,0.337986,0.891007,0.311724,0.108993,0.311724,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
6,image_full_gray,0.847826,0.411667,0.75488,0.415486,0.827923,0.37508,0.172077,0.37508,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
7,limiarização_binária,0.817749,0.47852,0.675171,0.459704,0.782807,0.410325,0.217193,0.410325,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
8,limiarização_de_otsu,0.993746,0.494417,0.884777,0.341119,0.907336,0.289112,0.092664,0.289112,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
9,original,0.84187,0.430695,0.742225,0.421993,0.818989,0.383014,0.181011,0.383014,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card


In [None]:
result_all_default.style.background_gradient(cmap='viridis')

Unnamed: 0,filter_,wer_mean,wer_std,cer_mean,cer_std,wil_mean,wil_std,wip_mean,wip_std,fluxo,type
0,bilateral,0.756998,0.549599,0.559238,0.445395,0.71352,0.451922,0.28648,0.451922,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
1,image_full_gray,0.506849,0.533263,0.296577,0.394953,0.488237,0.499004,0.511763,0.499004,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
2,limiarização_binária,0.784395,0.505011,0.59078,0.454271,0.749256,0.433227,0.250744,0.433227,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
3,limiarização_de_otsu,0.834127,0.472141,0.684366,0.448957,0.804348,0.396257,0.195652,0.396257,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
4,original,0.488088,0.534429,0.282475,0.387253,0.469029,0.498442,0.530971,0.498442,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
5,bilateral,0.89994,0.33221,0.859554,0.337986,0.891007,0.311724,0.108993,0.311724,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
6,image_full_gray,0.847826,0.411667,0.75488,0.415486,0.827923,0.37508,0.172077,0.37508,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
7,limiarização_binária,0.817749,0.47852,0.675171,0.459704,0.782807,0.410325,0.217193,0.410325,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
8,limiarização_de_otsu,0.993746,0.494417,0.884777,0.341119,0.907336,0.289112,0.092664,0.289112,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
9,original,0.84187,0.430695,0.742225,0.421993,0.818989,0.383014,0.181011,0.383014,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card


## Texts in lowercase

In [None]:
result_identity_card_lowercase['type'] = 'Identity Card'
result_IIIT5K_lowercase['type'] = 'IIIT5K'
result_filtered_SROEI_lowercase['type'] = 'SROEI'

result_lowercase = pd.concat([result_identity_card_lowercase, result_IIIT5K_lowercase, result_filtered_SROEI_lowercase], ignore_index=True)

In [None]:
styled_df  = result_lowercase.style.applymap(style_background_gradient, subset=['type'])
styled_df


Unnamed: 0,filter_,wer_lowercase_mean,wer_lowercase_std,cer_lowercase_mean,cer_lowercase_std,wil_lowercase_mean,wil_lowercase_std,wip_lowercase_mean,wip_lowercase_std,fluxo,type
0,bilateral,0.718117,0.566228,0.490908,0.446716,0.674613,0.468341,0.325387,0.468341,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
1,image_full_gray,0.455304,0.531449,0.227296,0.346151,0.436681,0.495107,0.563319,0.495107,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
2,limiarização_binária,0.760429,0.517835,0.548457,0.449113,0.725268,0.446178,0.274732,0.446178,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
3,limiarização_de_otsu,0.810191,0.485813,0.638639,0.44817,0.780989,0.412797,0.219011,0.412797,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
4,original,0.437128,0.529745,0.2039,0.322613,0.418355,0.492529,0.581645,0.492529,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
5,bilateral,0.899285,0.333016,0.857452,0.338778,0.890346,0.312552,0.109654,0.312552,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
6,image_full_gray,0.845352,0.413775,0.749576,0.416442,0.825437,0.377239,0.174563,0.377239,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
7,limiarização_binária,0.810489,0.483315,0.651867,0.452945,0.775526,0.415251,0.224474,0.415251,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
8,limiarização_de_otsu,0.992551,0.495752,0.876383,0.338899,0.906089,0.290861,0.093911,0.290861,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
9,original,0.839392,0.432688,0.735725,0.422179,0.816498,0.385075,0.183502,0.385075,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card


In [None]:
result_all_default.style.background_gradient(cmap='viridis')

Unnamed: 0,filter_,wer_mean,wer_std,cer_mean,cer_std,wil_mean,wil_std,wip_mean,wip_std,fluxo,type
0,bilateral,0.756998,0.549599,0.559238,0.445395,0.71352,0.451922,0.28648,0.451922,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
1,image_full_gray,0.506849,0.533263,0.296577,0.394953,0.488237,0.499004,0.511763,0.499004,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
2,limiarização_binária,0.784395,0.505011,0.59078,0.454271,0.749256,0.433227,0.250744,0.433227,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
3,limiarização_de_otsu,0.834127,0.472141,0.684366,0.448957,0.804348,0.396257,0.195652,0.396257,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
4,original,0.488088,0.534429,0.282475,0.387253,0.469029,0.498442,0.530971,0.498442,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
5,bilateral,0.89994,0.33221,0.859554,0.337986,0.891007,0.311724,0.108993,0.311724,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
6,image_full_gray,0.847826,0.411667,0.75488,0.415486,0.827923,0.37508,0.172077,0.37508,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
7,limiarização_binária,0.817749,0.47852,0.675171,0.459704,0.782807,0.410325,0.217193,0.410325,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
8,limiarização_de_otsu,0.993746,0.494417,0.884777,0.341119,0.907336,0.289112,0.092664,0.289112,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
9,original,0.84187,0.430695,0.742225,0.421993,0.818989,0.383014,0.181011,0.383014,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card


## Texts in lowercase and removed special characters and punctuations

In [None]:
result_identity_card_lowercase_no_special['type'] = 'Identity Card'
result_IIIT5K_lowercase_no_special['type'] = 'IIIT5K'
result_filtered_SROEI_lowercase_no_special['type'] = 'SROEI'

result_no_special = pd.concat([result_identity_card_lowercase_no_special, result_IIIT5K_lowercase_no_special, result_filtered_SROEI_lowercase_no_special], ignore_index=True)

In [None]:
styled_df  = result_no_special.style.applymap(style_background_gradient, subset=['type'])
styled_df


Unnamed: 0,filter_,wer_no_schar_mean,wer_no_schar_std,cer_no_schar_mean,cer_no_schar_std,wil_no_schar_mean,wil_no_schar_std,wip_no_schar_mean,wip_no_schar_std,fluxo,type
0,bilateral,0.670644,0.557283,0.480576,0.453708,0.637828,0.480461,0.362172,0.480461,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
1,image_full_gray,0.387232,0.503828,0.20728,0.34364,0.378431,0.484563,0.621569,0.484563,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
2,limiarização_binária,0.707637,0.509458,0.52354,0.448943,0.686158,0.464192,0.313842,0.464192,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
3,limiarização_de_otsu,0.765215,0.484914,0.618281,0.450229,0.745823,0.435184,0.254177,0.435184,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
4,original,0.36963,0.497315,0.182213,0.316255,0.362172,0.480461,0.637828,0.480461,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
5,bilateral,0.882458,0.35056,0.853875,0.347647,0.874105,0.33183,0.125895,0.33183,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
6,image_full_gray,0.800716,0.436362,0.736545,0.429369,0.787739,0.40798,0.212261,0.40798,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
7,limiarização_binária,0.757757,0.498145,0.635224,0.465413,0.731504,0.442636,0.268496,0.442636,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
8,limiarização_de_otsu,0.978819,0.501043,0.869481,0.341162,0.89464,0.30671,0.10536,0.30671,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
9,original,0.785203,0.447996,0.719201,0.437283,0.771629,0.418885,0.228371,0.418885,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card


In [None]:
result_all_default.style.background_gradient(cmap='viridis')

Unnamed: 0,filter_,wer_mean,wer_std,cer_mean,cer_std,wil_mean,wil_std,wip_mean,wip_std,fluxo,type
0,bilateral,0.756998,0.549599,0.559238,0.445395,0.71352,0.451922,0.28648,0.451922,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
1,image_full_gray,0.506849,0.533263,0.296577,0.394953,0.488237,0.499004,0.511763,0.499004,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
2,limiarização_binária,0.784395,0.505011,0.59078,0.454271,0.749256,0.433227,0.250744,0.433227,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
3,limiarização_de_otsu,0.834127,0.472141,0.684366,0.448957,0.804348,0.396257,0.195652,0.396257,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
4,original,0.488088,0.534429,0.282475,0.387253,0.469029,0.498442,0.530971,0.498442,CRAFT -> OpenCV[todos preproc] -> EASYOCR,Identity Card
5,bilateral,0.89994,0.33221,0.859554,0.337986,0.891007,0.311724,0.108993,0.311724,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
6,image_full_gray,0.847826,0.411667,0.75488,0.415486,0.827923,0.37508,0.172077,0.37508,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
7,limiarização_binária,0.817749,0.47852,0.675171,0.459704,0.782807,0.410325,0.217193,0.410325,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
8,limiarização_de_otsu,0.993746,0.494417,0.884777,0.341119,0.907336,0.289112,0.092664,0.289112,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
9,original,0.84187,0.430695,0.742225,0.421993,0.818989,0.383014,0.181011,0.383014,CRAFT -> OpenCV[todos preproc] -> Tesseract,Identity Card
