# Evaluate the model

#### Import libraries

In [2]:
import os
import tarfile
import itertools
import subprocess
import numpy as np
import pandas as pd
from utilities import *
from generate_output_text import process_images

#### Install ocreval tool

##### Install for macos

In [5]:
!git clone https://github.com/eddieantonio/ocreval.git

fatal: destination path 'ocreval' already exists and is not an empty directory.


#### Create folders

In [None]:
!mkdir ./our_models
!mkdir ./benchmarks
!mkdir ./benchmarks/csv_benchmarks
!mkdir ./benchmarks/zipped_benchmarks
!mkdir ./benchmarks/output_benchmarks

#### Download Hegghammar's benchmakrs

In [None]:
# download ground truth
# download_benchmark('https://zenodo.org/records/5068735/files/ground_truth.tar.lzma?download=1', './benchmarks/zipped_benchmarks')

benchmarks_names = ['yarmouk_01_col.tar.lzma', 'yarmouk_02_bin.tar.lzma', 'yarmouk_03_col_blur.tar.lzma', 
                    'yarmouk_04_col_weak.tar.lzma', 'yarmouk_05_col_snp.tar.lzma', 'yarmouk_06_col_wm.tar.lzma',
                    'yarmouk_07_col_scrib.tar.lzma', 'yarmouk_08_col_ink.tar.lzma', 'yarmouk_09_bin_blur.tar.lzma',
                    'yarmouk_10_bin_weak.tar.lzma', 'yarmouk_11_bin_snp.tar.lzma', 'yarmouk_12_bin_wm.tar.lzma',
                    'yarmouk_13_bin_scrib.tar.lzma', 'yarmouk_14_bin_ink.tar.lzma', 'yarmouk_15_col_blur_weak.tar.lzma',
                    'yarmouk_16_col_blur_snp.tar.lzma', 'yarmouk_17_col_blur_wm.tar.lzma', 'yarmouk_18_col_blur_scrib.tar.lzma',
                    'yarmouk_19_col_blur_ink.tar.lzma', 'yarmouk_20_col_weak_snp.tar.lzma', 'yarmouk_21_col_weak_wm.tar.lzma',
                    'yarmouk_22_col_weak_scrib.tar.lzma', 'yarmouk_23_col_weak_ink.tar.lzma', 'yarmouk_24_col_snp_wm.tar.lzma',
                    'yarmouk_25_col_snp_scrib.tar.lzma', 'yarmouk_26_col_snp_ink.tar.lzma', 'yarmouk_27_col_wm_scrib.tar.lzma',
                    'yarmouk_28_col_wm_ink.tar.lzma', 'yarmouk_29_col_scrib_ink.tar.lzma', 'yarmouk_30_bin_blur_weak.tar.lzma',
                    'yarmouk_31_bin_blur_snp.tar.lzma', 'yarmouk_32_bin_blur_wm.tar.lzma', 'yarmouk_33_bin_blur_scrib.tar.lzma',
                    'yarmouk_34_bin_blur_ink.tar.lzma', 'yarmouk_35_bin_weak_snp.tar.lzma', 'yarmouk_36_bin_weak_wm.tar.lzma',
                    'yarmouk_37_bin_weak_scrib.tar.lzma', 'yarmouk_38_bin_weak_ink.tar.lzma', 'yarmouk_39_bin_snp_wm.tar.lzma',
                    'yarmouk_40_bin_snp_scrib.tar.lzma', 'yarmouk_41_bin_snp_ink.tar.lzma', 'yarmouk_42_bin_wm_scrib.tar.lzma', 
                    'yarmouk_43_bin_wm_ink.tar.lzma', 'yarmouk_44_bin_scrib_ink.tar.lzma']

num_to_download = 3     # the number of benchmarks in the list to be downloaded
for benchmark in benchmarks_names[34:]:
    print(benchmark)
    download_benchmark(f'https://zenodo.org/records/5068735/files/{benchmark}?download=1', './benchmarks/zipped_benchmarks')
    

#### Unzip benchmarks

In [None]:
unzip_benchmark_folder_path = './benchmarks'
benchmarks_path = './benchmarks/zipped_benchmarks'

for idx, benchmark in enumerate(sorted(os.listdir(benchmarks_path))):
    print(benchmark)
    benchmark_path = os.path.join(benchmarks_path, benchmark)
    with tarfile.open(benchmark_path, mode='r:xz') as z:
        z.extractall(path=unzip_benchmark_folder_path)

# rename yarmouk to yarmouk_benchmarks
subprocess.run('mv ./benchmarks/yarmouk ./benchmarks/yarmouk_benchmarks', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
subprocess.run('mv ./benchmarks/ground_truth ./benchmarks/benchmark_ground_truth', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

### Find best value for OEM and PSM

#### Generate combinations of OEM and PSM values

In [None]:
!mkdir ./benchmarks/benchmarks_combinations

In [None]:
oem_values = ['3']
psm_values = ['3', '4', '6', '11']
benchmarks_names = os.listdir('./benchmarks/yarmouk_benchmarks/')

combination_lst = list(itertools.product(oem_values, psm_values, benchmarks_names))
print(sorted(combination_lst))
print(len(combination_lst))

[('3', '11', '01_col'), ('3', '11', '02_bin'), ('3', '11', '03_col_blur'), ('3', '11', '04_col_weak'), ('3', '11', '05_col_snp'), ('3', '11', '06_col_wm'), ('3', '11', '07_col_scrib'), ('3', '11', '08_col_ink'), ('3', '11', '09_bin_blur'), ('3', '11', '10_bin_weak'), ('3', '11', '11_bin_snp'), ('3', '11', '12_bin_wm'), ('3', '11', '13_bin_scrib'), ('3', '11', '14_bin_ink'), ('3', '11', '15_col_blur_weak'), ('3', '11', '16_col_blur_snp'), ('3', '11', '17_col_blur_wm'), ('3', '11', '18_col_blur_scrib'), ('3', '11', '19_col_blur_ink'), ('3', '11', '20_col_weak_snp'), ('3', '11', '21_col_weak_wm'), ('3', '11', '22_col_weak_scrib'), ('3', '11', '23_col_weak_ink'), ('3', '11', '24_col_snp_wm'), ('3', '11', '25_col_snp_scrib'), ('3', '11', '26_col_snp_ink'), ('3', '11', '27_col_wm_scrib'), ('3', '11', '28_col_wm_ink'), ('3', '11', '29_col_scrib_ink'), ('3', '11', '30_bin_blur_weak'), ('3', '11', '31_bin_blur_snp'), ('3', '11', '32_bin_blur_wm'), ('3', '11', '33_bin_blur_scrib'), ('3', '11', '

#### Generate output text files
the output text file are in ./benchmarks/benchmarks_combinations

In [None]:
# 29_col_scrib_ink
for idx, (oem, psm, folder) in enumerate(sorted(combination_lst)):
    if psm == '6' and folder == '29_col_scrib_ink':
        print(idx)
        break

160
[('3', '6', '29_col_scrib_ink'), ('3', '6', '30_bin_blur_weak'), ('3', '6', '31_bin_blur_snp'), ('3', '6', '32_bin_blur_wm'), ('3', '6', '33_bin_blur_scrib'), ('3', '6', '34_bin_blur_ink'), ('3', '6', '35_bin_weak_snp'), ('3', '6', '36_bin_weak_wm'), ('3', '6', '37_bin_weak_scrib'), ('3', '6', '38_bin_weak_ink'), ('3', '6', '39_bin_snp_wm'), ('3', '6', '40_bin_snp_scrib'), ('3', '6', '41_bin_snp_ink'), ('3', '6', '42_bin_wm_scrib'), ('3', '6', '43_bin_wm_ink'), ('3', '6', '44_bin_scrib_ink')]


In [None]:
benchmarks_folder_path = './benchmarks/yarmouk_benchmarks/'
benchmark_output_path = './benchmarks/benchmarks_combinations/'

skip_snp = 'snp'

for idx, (oem, psm, folder) in enumerate(sorted(combination_lst)[160:]):
    if skip_snp in folder:
        continue
    
    print(folder)
    print(f'oem: {oem} - psm: {psm}')
    process_images(folder_path=benchmarks_folder_path + folder, output_folder_path=benchmark_output_path,
                   model='ara_best', is_colab=True, is_yarmouk=True, is_combination=True,
                   oem_nubmer=oem, psm_number=psm)

29_col_scrib_ink
oem: 3 - psm: 6
./benchmarks/yarmouk_benchmarks/29_col_scrib_ink/26857_1.tiff ./benchmarks/benchmarks_combinations/yarmouk_29_col_scrib_ink_output_3_6/26857_1.output
Processed: ./benchmarks/yarmouk_benchmarks/29_col_scrib_ink/26857_1.tiff (Processing Time: 3.76 seconds)
./benchmarks/yarmouk_benchmarks/29_col_scrib_ink/6327_1.tiff ./benchmarks/benchmarks_combinations/yarmouk_29_col_scrib_ink_output_3_6/6327_1.output
Processed: ./benchmarks/yarmouk_benchmarks/29_col_scrib_ink/6327_1.tiff (Processing Time: 3.11 seconds)
./benchmarks/yarmouk_benchmarks/29_col_scrib_ink/27927_1.tiff ./benchmarks/benchmarks_combinations/yarmouk_29_col_scrib_ink_output_3_6/27927_1.output
Processed: ./benchmarks/yarmouk_benchmarks/29_col_scrib_ink/27927_1.tiff (Processing Time: 2.18 seconds)
./benchmarks/yarmouk_benchmarks/29_col_scrib_ink/25223_1.tiff ./benchmarks/benchmarks_combinations/yarmouk_29_col_scrib_ink_output_3_6/25223_1.output
Processed: ./benchmarks/yarmouk_benchmarks/29_col_scrib

#### Use ISRI tool to evaluate all combinations

In [None]:
csv_name = 'ara_best_combination'
gt_path = './benchmarks/benchmark_ground_truth/yarmouk_gt'
output_benchmarks_path = './benchmarks/benchmarks_combinations'

dataset_name_lst, file_name_lst, engine_name_lst, char_acc_lst, word_acc_lst, oem_lst, psm_lst = get_accuracy(ground_truth_path=gt_path, 
                                                                                                              output_benchmarks_path=output_benchmarks_path, 
                                                                                                              model_name='our_ara', is_combination=True)

print(len(dataset_name_lst), len(file_name_lst), len(engine_name_lst), len(char_acc_lst), len(word_acc_lst), len(oem_lst), len(oem_lst))
create_df(csv_name=csv_name, dataset_name_lst=dataset_name_lst, 
          file_name_lst=file_name_lst, engine_name_lst=engine_name_lst, 
          char_acc_lst=char_acc_lst, word_acc_lst=word_acc_lst, oem_lst=oem_lst, psm_lst=psm_lst)

#### Find the mean of each benchmark

In [None]:
df = pd.read_csv(f'benchmarks/csv_benchmarks/{csv_name}.csv')

# convert datatype for char_acc and word_acc to be float
df['char_acc'] = df['char_acc'].str.replace('%', '').astype(float)
df['word_acc'] = df['word_acc'].str.replace('%', '').astype(float)
df['oem'] = df['oem'].astype(str)
df['psm'] = df['psm'].astype(str)

df

Unnamed: 0,dataset,file,engine,oem,psm,char_acc,word_acc
0,yarmouk_28_col_wm_ink_3_4,7913.txt,our_ara,3,4,75.90,83.06
1,yarmouk_28_col_wm_ink_3_4,4347.txt,our_ara,3,4,40.16,46.69
2,yarmouk_28_col_wm_ink_3_4,11464.txt,our_ara,3,4,66.94,80.51
3,yarmouk_28_col_wm_ink_3_4,13756.txt,our_ara,3,4,57.64,66.82
4,yarmouk_28_col_wm_ink_3_4,20181.txt,our_ara,3,4,41.71,42.31
...,...,...,...,...,...,...,...
12795,yarmouk_19_col_blur_ink_3_3,11761.txt,our_ara,3,3,0.00,0.00
12796,yarmouk_19_col_blur_ink_3_3,10023.txt,our_ara,3,3,18.74,3.66
12797,yarmouk_19_col_blur_ink_3_3,3787.txt,our_ara,3,3,0.47,0.00
12798,yarmouk_19_col_blur_ink_3_3,6066.txt,our_ara,3,3,19.71,2.60


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12800 entries, 0 to 12799
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   dataset   12800 non-null  object 
 1   file      12800 non-null  object 
 2   engine    12800 non-null  object 
 3   oem       12800 non-null  object 
 4   psm       12800 non-null  object 
 5   char_acc  12800 non-null  float64
 6   word_acc  12800 non-null  float64
dtypes: float64(2), object(5)
memory usage: 700.1+ KB


In [None]:
# get all benchmarks names
benchmarks_names = [name for name in os.listdir('./benchmarks/benchmarks_combinations')]

mean_values = {}
for folder_name in benchmarks_names:
    folder_name_final = folder_name.replace('_output', '')
    splitted_folder_name = folder_name_final.split('_')
    oem_value = splitted_folder_name[-2]
    psm_value = splitted_folder_name[-1]
    mean_values[folder_name_final] = (df['engine'].iloc[0], oem_value, psm_value, 
                                      np.mean(df[(df['dataset'] == folder_name_final) & (df['oem'] == oem_value) & (df['psm'] == psm_value)]['char_acc']),
                                      np.mean(df[(df['dataset'] == folder_name_final) & (df['oem'] == oem_value) & (df['psm'] == psm_value)]['word_acc']),)

mean_values

{'yarmouk_28_col_wm_ink_3_4': ('our_ara',
  '3',
  '4',
  53.07650000000002,
  57.84870000000001),
 'yarmouk_07_col_scrib_3_6': ('our_ara', '3', '6', 68.0559, 83.5535),
 'yarmouk_27_col_wm_scrib_3_6': ('our_ara', '3', '6', 69.5408, 82.7758),
 'yarmouk_12_bin_wm_3_6': ('our_ara', '3', '6', 73.5084, 88.6463),
 'yarmouk_27_col_wm_scrib_3_4': ('our_ara',
  '3',
  '4',
  63.494699999999995,
  70.60229999999999),
 'yarmouk_03_col_blur_3_11': ('our_ara',
  '3',
  '11',
  69.23460000000001,
  80.99119999999999),
 'yarmouk_38_bin_weak_ink_3_3': ('our_ara', '3', '3', 60.3587, 71.9808),
 'yarmouk_33_bin_blur_scrib_3_11': ('our_ara',
  '3',
  '11',
  64.9431,
  76.62660000000001),
 'yarmouk_44_bin_scrib_ink_3_11': ('our_ara',
  '3',
  '11',
  55.28789999999999,
  63.147000000000006),
 'yarmouk_21_col_weak_wm_3_6': ('our_ara',
  '3',
  '6',
  74.8985,
  89.54809999999999),
 'yarmouk_29_col_scrib_ink_3_3': ('our_ara', '3', '3', 48.37, 52.3266),
 'yarmouk_32_bin_blur_wm_3_3': ('our_ara',
  '3',
  '3'

#### Save the mean of each benchmark into a new dataframe

In [None]:
benchmarks_names_lst = []
model_name_lst = []
oem_values_lst = []
psm_values_lst = []
char_acc_mean_lst = []
word_acc_mean_lst = []

for values in list(mean_values.items()):
    benchmarks_names_lst.append(values[0])
    model_name_lst.append(values[1][0])
    oem_values_lst.append(values[1][1])
    psm_values_lst.append(values[1][2])
    char_acc_mean_lst.append(values[1][3])
    word_acc_mean_lst.append(values[1][4])
    
# dataset	file	engine	oem	psm	char_acc	word_acc
data = {'dataset': benchmarks_names_lst, 'engine': model_name_lst, 'oem': oem_values_lst, 'psm': psm_values_lst, 
        'char_acc_mean': char_acc_mean_lst, 'word_acc_mean': word_acc_mean_lst}

df_mean = pd.DataFrame(data)
df_mean.dropna(inplace=True)
df_mean.sort_values('dataset', inplace=True, ascending=False)
# df_mean.sort_values('dataset', inplace=True)

In [None]:
df_mean.info()

<class 'pandas.core.frame.DataFrame'>
Index: 128 entries, 54 to 83
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   dataset        128 non-null    object 
 1   engine         128 non-null    object 
 2   oem            128 non-null    object 
 3   psm            128 non-null    object 
 4   char_acc_mean  128 non-null    float64
 5   word_acc_mean  128 non-null    float64
dtypes: float64(2), object(4)
memory usage: 7.0+ KB


In [None]:
df_mean['word_acc_mean'] = 100 - df_mean['word_acc_mean']
df_mean

Unnamed: 0,dataset,engine,oem,psm,char_acc_mean,word_acc_mean
54,yarmouk_44_bin_scrib_ink_3_6,our_ara,3,6,58.1821,34.5879
75,yarmouk_44_bin_scrib_ink_3_4,our_ara,3,4,49.3226,45.7493
45,yarmouk_44_bin_scrib_ink_3_3,our_ara,3,3,49.4591,45.0417
8,yarmouk_44_bin_scrib_ink_3_11,our_ara,3,11,55.2879,36.8530
114,yarmouk_43_bin_wm_ink_3_6,our_ara,3,6,60.6991,32.2702
...,...,...,...,...,...,...
117,yarmouk_02_bin_3_11,our_ara,3,11,72.5531,20.7445
85,yarmouk_01_col_3_6,our_ara,3,6,71.5345,11.9366
102,yarmouk_01_col_3_4,our_ara,3,4,71.8901,13.8263
108,yarmouk_01_col_3_3,our_ara,3,3,71.9268,14.2327


In [None]:
# test
df_mean_final = pd.read_csv('./benchmarks/csv_benchmarks/ara_best_combination_combinations_mean.csv')

df_mean_final.psm.value_counts()

psm
6    30
3     2
Name: count, dtype: int64

In [None]:
# top_5_values_per_name = df_mean.groupby('dataset', group_keys=False)['word_acc_mean'].apply(lambda x: x.nlargest(5))
# top_5_values_per_name.to_csv('./benchmarks/csv_benchmarks/our_ara_combinations_mean_top_5.csv', index=False)

In [None]:
df_mean.to_csv(f'./benchmarks/csv_benchmarks/{csv_name}_mean.csv', index=False)


### Start evaluating the model

#### Generate output text files
the output text files are in ./benchmarks/output_benchmarks

#### Create a folder for the model output

In [7]:
model_name = 'ara_fast'
if model_name not in os.listdir('./benchmarks/output_benchmarks'):
    !mkdir ./benchmarks/output_benchmarks/$model_name

In [8]:
benchmarks_folder_path = './benchmarks/yarmouk_benchmarks/'
benchmark_output_path = f'./benchmarks/output_benchmarks/{model_name}/'

OEM_PSM_3 = ['06_col_wm', '10_bin_weak']

number_of_benchmarks = 7
for folder in sorted(os.listdir(benchmarks_folder_path))[:number_of_benchmarks]:
    print(folder)
    if 'snp' in folder:
        continue
    
    if folder in OEM_PSM_3:
        print(3, 3)
        process_images(folder_path=benchmarks_folder_path + folder, output_folder_path=benchmark_output_path,
                       model=model_name, is_colab=True, is_yarmouk=True, oem_number='3', psm_number='3')
    else:
        print(3, 6)
        process_images(folder_path=benchmarks_folder_path + folder, output_folder_path=benchmark_output_path,
                       model=model_name, is_colab=True, is_yarmouk=True, oem_number='3', psm_number='6')

01_col
3 6
ara_fast
./benchmarks/yarmouk_benchmarks/01_col/26857_1.tiff ./benchmarks/output_benchmarks/ara_fast/yarmouk_01_col_output/26857_1.output
Processed image 1: ./benchmarks/yarmouk_benchmarks/01_col/26857_1.tiff (Processing Time: 3.70 seconds)
./benchmarks/yarmouk_benchmarks/01_col/6327_1.tiff ./benchmarks/output_benchmarks/ara_fast/yarmouk_01_col_output/6327_1.output
Processed image 2: ./benchmarks/yarmouk_benchmarks/01_col/6327_1.tiff (Processing Time: 2.72 seconds)
./benchmarks/yarmouk_benchmarks/01_col/27927_1.tiff ./benchmarks/output_benchmarks/ara_fast/yarmouk_01_col_output/27927_1.output
Processed image 3: ./benchmarks/yarmouk_benchmarks/01_col/27927_1.tiff (Processing Time: 2.23 seconds)
./benchmarks/yarmouk_benchmarks/01_col/25223_1.tiff ./benchmarks/output_benchmarks/ara_fast/yarmouk_01_col_output/25223_1.output
Processed image 4: ./benchmarks/yarmouk_benchmarks/01_col/25223_1.tiff (Processing Time: 2.78 seconds)
./benchmarks/yarmouk_benchmarks/01_col/32407_1.tiff ./b

#### Use ISRI tool to evaluate the model

In [9]:

gt_path = './benchmarks/benchmark_ground_truth/yarmouk_gt'
output_benchmarks_path = f'./benchmarks/output_benchmarks/{model_name}'

dataset_name_lst, file_name_lst, engine_name_lst, char_acc_lst, word_acc_lst, _, _ = get_accuracy(ground_truth_path=gt_path, 
                                                                                                  output_benchmarks_path=output_benchmarks_path, 
                                                                                                  model_name=model_name)

print(len(dataset_name_lst), len(file_name_lst), len(engine_name_lst), len(char_acc_lst), len(word_acc_lst))
create_df(csv_name=model_name, dataset_name_lst=dataset_name_lst, 
          file_name_lst=file_name_lst, engine_name_lst=engine_name_lst, 
          char_acc_lst=char_acc_lst, word_acc_lst=word_acc_lst)

yarmouk_01_col_output
./benchmarks/benchmark_ground_truth/yarmouk_gt/7913.txt ./benchmarks/output_benchmarks/ara_fast/yarmouk_01_col_output/7913_1.output.txt


word_acc index: 25.40%
char_acc index: 37.31%
./benchmarks/benchmark_ground_truth/yarmouk_gt/4347.txt ./benchmarks/output_benchmarks/ara_fast/yarmouk_01_col_output/4347_1.output.txt
word_acc index: 98.01%
char_acc index: 70.18%
./benchmarks/benchmark_ground_truth/yarmouk_gt/11464.txt ./benchmarks/output_benchmarks/ara_fast/yarmouk_01_col_output/11464_1.output.txt
word_acc index: 88.72%
char_acc index: 63.16%
./benchmarks/benchmark_ground_truth/yarmouk_gt/13756.txt ./benchmarks/output_benchmarks/ara_fast/yarmouk_01_col_output/13756_1.output.txt
word_acc index: 92.24%
char_acc index: 68.90%
./benchmarks/benchmark_ground_truth/yarmouk_gt/20181.txt ./benchmarks/output_benchmarks/ara_fast/yarmouk_01_col_output/20181_1.output.txt
word_acc index: 58.04%
char_acc index: 44.27%
./benchmarks/benchmark_ground_truth/yarmouk_gt/9256.txt ./benchmarks/output_benchmarks/ara_fast/yarmouk_01_col_output/9256_1.output.txt
word_acc index: 93.29%
char_acc index: 68.78%
./benchmarks/benchmark_ground_truth/ya