In [19]:
!pip install torchvision matplotlib numpy tensorboard standard-imghdr torchnet scikit-learn opencv-python Levenshtein


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [20]:
# imports
import numpy as np
from PIL import Image

import Levenshtein
import sys
sys.path.append('../core')
from main import process_image

# Evaluation random words

caricamento dataset

In [21]:
words_dir = "../dataset/words_dataset"
words_dataset = np.loadtxt(f'{words_dir}/dataset.txt', dtype=str, delimiter='\t', comments=[])

In [22]:
len(words_dataset)

1500

In [23]:
results = []
for i, data in enumerate(words_dataset):
    img_path = f'{words_dir}/{data[0]}'
    print("Processing: ", img_path)
    print(data[1])
    img = Image.open(img_path)
    try:
        _, output = process_image(img)
        edit_distance = Levenshtein.distance(data[1], output)

        results.append({
            'path': img_path,
            'label': data[1],
            'output': output,
            'edit_distance': edit_distance
        })
        print(f'{output}\nEDIT DISTANCE: {edit_distance}\n')
    except Exception as e:
        raise e

Processing:  ../dataset/words_dataset/Lato-Medium/2e50447b593857e282acc3a924_32221.png
.PD{Y8W€é$
. P D{Y8W€é$
EDIT DISTANCE: 2

Processing:  ../dataset/words_dataset/Lato-Medium/76774e634770415c666b_81035.png
vwNcGpA\fk
vw N cG pA\&
EDIT DISTANCE: 5

Processing:  ../dataset/words_dataset/Lato-Medium/577b32352bc3b9762b4437_74553.png
W{25+ùv+D7
W{2 5 + ùV+ D 7
EDIT DISTANCE: 6

Processing:  ../dataset/words_dataset/Lato-Medium/536c2e68504d41446d2c_78218.png
Sl.hPMADm,
Sl. h PMADm,
EDIT DISTANCE: 2

Processing:  ../dataset/words_dataset/Lato-Medium/61753a44245d375d7578_51301.png
au:D$]7]ux
a u: D$]7] ux
EDIT DISTANCE: 3

Processing:  ../dataset/words_dataset/Lato-Medium/4f3b513b704d29213577_91954.png
O;Q;pM)!5w
o;Q; pM)! 5W
EDIT DISTANCE: 4

Processing:  ../dataset/words_dataset/Lato-Medium/34524e47462b34652952_55796.png
4RNGF+4e)R
4 RN G F+4e) R
EDIT DISTANCE: 4

Processing:  ../dataset/words_dataset/Lato-Medium/75455034576f726c6cc3b9_42634.png
uEP4Worllù
u E P4Wo r l l ù
EDIT DISTANCE:

In [24]:
results[0]
edit_distances = [result['edit_distance'] for result in results if result['edit_distance'] is not None]
mean = np.mean(edit_distances)
variance = np.var(edit_distances)
minimum = np.min(edit_distances)
maximum = np.max(edit_distances)

print(f"Mean: {mean}, Variance: {variance}, Min: {minimum}, Max: {maximum}")

Mean: 5.43, Variance: 23.099766666666664, Min: 0, Max: 40


In [25]:
max_edit_distance_objects = [result for result in results if result['edit_distance'] == maximum]
print(max_edit_distance_objects)

[{'path': '../dataset/words_dataset/Lato-Hairline/756e7a6d752351525275_73001.png', 'label': np.str_('unzmu#QRRu'), 'output': 'g... -%.Z .-~.-~. g...# :Z:&.. Z] Z] g...', 'edit_distance': 40}]


In [26]:
def normalize_simple(text):
    if not text:
        return ""
    text = text.lower().replace(" ", "")
    replacements = {
        '0': 'o',
        'i': 'l',
        '1': 'l',
        'I': 'l',
    }
    return ''.join(replacements.get(c, c) for c in text)

correct_predictions_case = sum(
    1 for result in results
    if result['output'] and result['label'] == result['output']
)

correct_predictions = sum(
    1 for result in results
    if result['output'] and result['label'].lower() == result['output'].lower()
)

correct_predictions_no_spaces = sum(
    1 for result in results 
    if result['output'] and result['label'].replace(" ", "").lower() == result['output'].replace(" ", "").lower()
)

correct_predictions_case_no_spaces = sum(
    1 for result in results
    if result['output'] and result['label'].replace(" ", "") == result['output'].replace(" ", "")
)

correct_predictions_ignore_simple = sum(
    1 for result in results
    if result['output'] and normalize_simple(result['label']) == normalize_simple(result['output'])
)

# Accuracy values
accuracy_case = correct_predictions_case / len(results) * 100
accuracy = correct_predictions / len(results) * 100
accuracy_no_spaces = correct_predictions_no_spaces / len(results) * 100
accuracy_case_no_spaces = correct_predictions_case_no_spaces / len(results) * 100
accuracy_ignore_simple = correct_predictions_ignore_simple / len(results) * 100

# Output
print(f"Accuracy (case sensitive): {accuracy_case:.2f}%")
print(f"Accuracy (case insensitive): {accuracy:.2f}%")
print(f"Accuracy (case insensitive, no spaces): {accuracy_no_spaces:.2f}%")
print(f"Accuracy (case sensitive, no spaces): {accuracy_case_no_spaces:.2f}%")
print(f"Accuracy (ignore case, spaces, o=0, i=l=I=1): {accuracy_ignore_simple:.2f}%")

Accuracy (case sensitive): 2.40%
Accuracy (case insensitive): 3.60%
Accuracy (case insensitive, no spaces): 43.80%
Accuracy (case sensitive, no spaces): 32.20%
Accuracy (ignore case, spaces, o=0, i=l=I=1): 48.60%


In [27]:
nonzero_edit_distance = [result for result in results if result['output'] and result['label'] != result['output']]
for r in nonzero_edit_distance:
    print(r)

{'path': '../dataset/words_dataset/Lato-Medium/2e50447b593857e282acc3a924_32221.png', 'label': np.str_('.PD{Y8W€é$'), 'output': '. P D{Y8W€é$', 'edit_distance': 2}
{'path': '../dataset/words_dataset/Lato-Medium/76774e634770415c666b_81035.png', 'label': np.str_('vwNcGpA\\fk'), 'output': 'vw N cG pA\\&', 'edit_distance': 5}
{'path': '../dataset/words_dataset/Lato-Medium/577b32352bc3b9762b4437_74553.png', 'label': np.str_('W{25+ùv+D7'), 'output': 'W{2 5 + ùV+ D 7', 'edit_distance': 6}
{'path': '../dataset/words_dataset/Lato-Medium/536c2e68504d41446d2c_78218.png', 'label': np.str_('Sl.hPMADm,'), 'output': 'Sl. h PMADm,', 'edit_distance': 2}
{'path': '../dataset/words_dataset/Lato-Medium/61753a44245d375d7578_51301.png', 'label': np.str_('au:D$]7]ux'), 'output': 'a u: D$]7] ux', 'edit_distance': 3}
{'path': '../dataset/words_dataset/Lato-Medium/4f3b513b704d29213577_91954.png', 'label': np.str_('O;Q;pM)!5w'), 'output': 'o;Q; pM)! 5W', 'edit_distance': 4}
{'path': '../dataset/words_dataset/Lat