In [1]:
!pip install torchvision matplotlib numpy tensorboard standard-imghdr torchnet scikit-learn opencv-python Levenshtein


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [7]:
# imports
import matplotlib.pyplot as plt
import numpy as np

import torch
import torchvision
import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data.dataset import Dataset
from PIL import Image, ImageDraw, ImageFont
from sklearn.metrics import accuracy_score

from torch.utils.tensorboard import SummaryWriter

from ImageToStringNet import ImageToStringNet, classes
from ImageToStringNetDropout import ImageToStringNetDropout

import Levenshtein
import sys
sys.path.append('../core')
from main import process_image

# Evaluation

In [8]:
words_dir = "../dataset/words_dataset"
words_dataset = np.loadtxt(f'{words_dir}/dataset.txt', dtype=str, delimiter='\t', comments=[])

In [9]:
len(words_dataset)

1500

In [10]:
results = []
for i, data in enumerate(words_dataset):
	img_path = f'{words_dir}/{data[0]}'
	print("Processing: ", img_path)
	print(data[1])
	img = Image.open(img_path)
	try:
		_, output = process_image(img)
		edit_distance = Levenshtein.distance(data[1], output)

		results.append({
			'path': img_path,
			'label': data[1],
			'output': output,
			'edit_distance': edit_distance
		})
		print(f'{output}\nEDIT DISTANCE: {edit_distance}\n')
	except Exception as e:
		print(f'Error: {e}')
		results.append({
			'path': img_path,
			'label': data[1],
			'output': None,
			'edit_distance': None,
			'error': e
		})

Processing:  ../dataset/words_dataset/Lato-Medium/2e50447b593857e282acc3a924_32221.png
.PD{Y8W€é$
.PD{Y8W€é$
EDIT DISTANCE: 0

Processing:  ../dataset/words_dataset/Lato-Medium/76774e634770415c666b_81035.png
vwNcGpA\fk
vwNcGpA\)
EDIT DISTANCE: 2

Processing:  ../dataset/words_dataset/Lato-Medium/577b32352bc3b9762b4437_74553.png
W{25+ùv+D7
W{25+ùV+D7
EDIT DISTANCE: 1

Processing:  ../dataset/words_dataset/Lato-Medium/536c2e68504d41446d2c_78218.png
Sl.hPMADm,
Sl.hPMADm,
EDIT DISTANCE: 0

Processing:  ../dataset/words_dataset/Lato-Medium/61753a44245d375d7578_51301.png
au:D$]7]ux
au:D$]7]ux
EDIT DISTANCE: 0

Processing:  ../dataset/words_dataset/Lato-Medium/4f3b513b704d29213577_91954.png
O;Q;pM)!5w
o;Q;pM)!5W
EDIT DISTANCE: 2

Processing:  ../dataset/words_dataset/Lato-Medium/34524e47462b34652952_55796.png
4RNGF+4e)R
4RNGF+4e)R
EDIT DISTANCE: 0

Processing:  ../dataset/words_dataset/Lato-Medium/75455034576f726c6cc3b9_42634.png
uEP4Worllù
uEP4Worllù
EDIT DISTANCE: 0

Processing:  ../dataset

In [11]:
results[0]
edit_distances = [result['edit_distance'] for result in results if result['edit_distance'] is not None]
mean = np.mean(edit_distances)
variance = np.var(edit_distances)
minimum = np.min(edit_distances)
maximum = np.max(edit_distances)

print(f"Mean: {mean}, Variance: {variance}, Min: {minimum}, Max: {maximum}")

Mean: 2.268111035883548, Variance: 20.193519316039342, Min: 0, Max: 36


In [12]:
max_edit_distance_objects = [result for result in results if result['edit_distance'] == maximum]
print(max_edit_distance_objects)

[{'path': '../dataset/words_dataset/Lato-Hairline/756e7a6d752351525275_73001.png', 'label': np.str_('unzmu#QRRu'), 'output': '<... -N.:.-~4-~.<...<:::~..:: ::<...', 'edit_distance': 36}]


In [13]:
# Filter and print objects containing errors
error_objects = [result for result in results if 'error' in result]
print("Objects with errors:")
for obj in error_objects:
	print(obj)

Objects with errors:
{'path': '../dataset/words_dataset/Lato-Light/5b613b2f686b6c545d3e_60919.png', 'label': np.str_('[a;/hklT]>'), 'output': None, 'edit_distance': None, 'error': error("OpenCV(4.11.0) /io/opencv/modules/imgproc/src/resize.cpp:4211: error: (-215:Assertion failed) inv_scale_x > 0 in function 'resize'\n")}
{'path': '../dataset/words_dataset/Lato-Light/566c77c2a3316e26423832_97465.png', 'label': np.str_('Vlw£1n&B82'), 'output': None, 'edit_distance': None, 'error': error("OpenCV(4.11.0) /io/opencv/modules/imgproc/src/resize.cpp:4211: error: (-215:Assertion failed) inv_scale_x > 0 in function 'resize'\n")}
{'path': '../dataset/words_dataset/Lato-Light/566c5c732f556250583c_53287.png', 'label': np.str_('Vl\\s/UbPX<'), 'output': None, 'edit_distance': None, 'error': error("OpenCV(4.11.0) /io/opencv/modules/imgproc/src/resize.cpp:4211: error: (-215:Assertion failed) inv_scale_x > 0 in function 'resize'\n")}
{'path': '../dataset/words_dataset/Lato-Light/28453a766c273e2734c3b9_8

In [14]:
len(error_objects)

23

In [None]:
correct_predictions_case = sum(1 for result in results if result['output'] and result['label'] == result['output'])
correct_predictions = sum(1 for result in results if result['output'] and result['label'].lower() == result['output'].lower())
correct_predictions_no_spaces = sum(
	1 for result in results 
	if result['output'] and result['label'].replace(" ", "").lower() == result['output'].replace(" ", "").lower()
)
correct_predictions_no_spaces_no_Il = sum(
	1 for result in results
	if result['output'] and result['label'].replace(" ", "").replace("I", "l").lower() == result['output'].replace(" ", "").replace("I", "l").lower()
)

accuracy_case = correct_predictions_case / len(results) * 100
accuracy = correct_predictions / len(results) * 100
accuracy_no_spaces = correct_predictions_no_spaces / len(results) * 100
accuracy_no_spaces_no_Il = correct_predictions_no_spaces_no_Il / len(results) * 100

print(f"Accuracy (case sensitive): {accuracy_case:.2f}%")
print(f"Accuracy (case insensitive): {accuracy:.2f}%")
print(f"Accuracy (case insensitive, no spaces inside): {accuracy_no_spaces:.2f}%")
print(f"Accuracy (case insensitive, no spaces inside, no Il diff): {accuracy_no_spaces_no_Il:.2f}%")


Accuracy (case sensitive): 34.73%
Accuracy (case insensitive): 50.73%
Accuracy (case insensitive, no spaces inside): 55.67%
Accuracy (case insensitive, no spaces inside, no Il diff): 59.20%


In [16]:
# Stampa i risultati con edit_distance diverso da 0
nonzero_edit_distance = [result for result in results if result['output'] and result['label'] != result['output']]
for r in nonzero_edit_distance:
    print(r)

{'path': '../dataset/words_dataset/Lato-Medium/76774e634770415c666b_81035.png', 'label': np.str_('vwNcGpA\\fk'), 'output': 'vwNcGpA\\)', 'edit_distance': 2}
{'path': '../dataset/words_dataset/Lato-Medium/577b32352bc3b9762b4437_74553.png', 'label': np.str_('W{25+ùv+D7'), 'output': 'W{25+ùV+D7', 'edit_distance': 1}
{'path': '../dataset/words_dataset/Lato-Medium/4f3b513b704d29213577_91954.png', 'label': np.str_('O;Q;pM)!5w'), 'output': 'o;Q;pM)!5W', 'edit_distance': 2}
{'path': '../dataset/words_dataset/Lato-Medium/56705353563b6225c2a350_53779.png', 'label': np.str_('VpSSV;b%£P'), 'output': 'vpssv;b%£P', 'edit_distance': 4}
{'path': '../dataset/words_dataset/Lato-Medium/6b552c51792c6e324449_49559.png', 'label': np.str_('kU,Qy,n2DI'), 'output': 'kU,Qy,n2Dl', 'edit_distance': 1}
{'path': '../dataset/words_dataset/Lato-Medium/71696a524d4b212664c2a3_15651.png', 'label': np.str_('qijRMK!&d£'), 'output': 'qiJRMK!&d£', 'edit_distance': 1}
{'path': '../dataset/words_dataset/Lato-Medium/2547405372