# Install and Import Modules

In [2]:
!pip install easyocr

Collecting easyocr
  Using cached easyocr-1.7.2-py3-none-any.whl.metadata (10 kB)
Collecting torch (from easyocr)
  Downloading torch-2.5.1-cp310-cp310-win_amd64.whl.metadata (28 kB)
Collecting torchvision>=0.5 (from easyocr)
  Downloading torchvision-0.20.1-cp310-cp310-win_amd64.whl.metadata (6.2 kB)
Collecting opencv-python-headless (from easyocr)
  Using cached opencv_python_headless-4.10.0.84-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting scipy (from easyocr)
  Downloading scipy-1.14.1-cp310-cp310-win_amd64.whl.metadata (60 kB)
Collecting numpy (from easyocr)
  Downloading numpy-2.2.0-cp310-cp310-win_amd64.whl.metadata (60 kB)
Collecting Pillow (from easyocr)
  Downloading pillow-11.0.0-cp310-cp310-win_amd64.whl.metadata (9.3 kB)
Collecting scikit-image (from easyocr)
  Downloading scikit_image-0.25.0-cp310-cp310-win_amd64.whl.metadata (14 kB)
Collecting python-bidi (from easyocr)
  Using cached python_bidi-0.6.3-cp310-none-win_amd64.whl.metadata (5.0 kB)
Collecting PyYAML (fr

In [14]:
!pip install pandas
!pip install datasets
!pip install -U scikit-learn scipy matplotlib

Collecting scikit-learn
  Downloading scikit_learn-1.6.0-cp310-cp310-win_amd64.whl.metadata (15 kB)
Collecting matplotlib
  Downloading matplotlib-3.10.0-cp310-cp310-win_amd64.whl.metadata (11 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.3.1-cp310-cp310-win_amd64.whl.metadata (5.4 kB)
Collecting cycler>=0.10 (from matplotlib)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.55.3-cp310-cp310-win_amd64.whl.metadata (168 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Downloading kiwisolver-1.4.7-cp310-cp310-win_amd64.whl.metadata (6.4 kB)
Collecting pyparsing>=2.3.1 (from matplotlib)
  Downloading pyparsing-3.2.0-py3-none-any.whl.metadata (5.0

In [56]:
!pip install kagglehub

Collecting kagglehub
  Downloading kagglehub-0.3.5-py3-none-any.whl.metadata (22 kB)
Downloading kagglehub-0.3.5-py3-none-any.whl (44 kB)
Installing collected packages: kagglehub
Successfully installed kagglehub-0.3.5


In [3]:
import easyocr
import time
import pandas as pd
import kagglehub
from typing import List
import string

reader = easyocr.Reader(["en"])
path = kagglehub.dataset_download("trainingdatapro/ocr-receipts-text-detection")

  from .autonotebook import tqdm as notebook_tqdm
Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.




# Define necessary functions

In [4]:
def normalize_text(text: str) -> str:
    return text.lower().translate(str.maketrans('', '', string.punctuation))

In [5]:
def calculate_accuracy(recognized: List[str], valid: List[str]) -> dict:
    recognized_set = {normalize_text(word) for word in recognized}
    valid_set = {normalize_text(word) for word in valid}
    
    true_positives = len(recognized_set & valid_set)
    false_positives = len(recognized_set - valid_set)
    false_negatives = len(valid_set - recognized_set)
    
    precision = true_positives / (true_positives + false_positives) if recognized_set else 0
    recall = true_positives / (true_positives + false_negatives) if valid_set else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) else 0
    
    return {
        "precision": precision * 100,
        "recall": recall * 100,
        "f1_score": f1_score * 100,
        "accuracy": (true_positives / len(valid)) * 100 if valid else 0
    }

# Define valid data array

In [6]:
valid_data = [['BANANAS', 'FRAP'],
 ['R',
  'CARROTS',
  'SHREDDED',
  '10',
  'OZ',
  'R',
  'CUCUMBERS',
  'PERSIAN',
  '1',
  'LB',
  'TOMATOES',
  'CRUSHED',
  'NO',
  'SALT',
  'TOMATOES',
  'WHOLE',
  'NO',
  'SALT',
  'W',
  'BASIL',
  'ORGANIC',
  'OLD',
  'FASHIONED',
  'OATMEAL',
  'MINI',
  'PEARL',
  'TOMATOES',
  'PKG',
  'SHREDDED',
  'MOZZARELLA',
  'LITE',
  'T',
  'EGGS',
  '1',
  'DOZ',
  'ORGANIC',
  'BROWN',
  'BEANS',
  'GARBANZO',
  'SPROUTED',
  'CA',
  'STYLE',
  'A',
  'AVOCADOS',
  'HASS',
  'BAG',
  '4CT',
  'A',
  'APPLE',
  'BAG',
  'JAZZ',
  '2',
  'LB',
  'A',
  'PEPPER',
  'BELL',
  'EACH',
  'XL',
  'RED',
  'GROCERY',
  'NON',
  'TAXABLE',
  'BANANAS',
  'ORGANIC',
  'CREAMY',
  'SALTED',
  'PEANUT',
  'BUTTER',
  'WHL',
  'WHT',
  'PITA',
  'BREAD',
  'GROCERY',
  'NON',
  'TAXABLE'],
 ['GV', 'OATMEAL', 'OT', '200Z', 'TUM', 'M', 'ATHLETICS', 'DEXAS', '15X20'],
 ['TATER',
  'TOTS',
  'HARD',
  'PROV',
  'DC',
  'SNACK',
  'BARS',
  'HRI',
  'CL',
  'CHS',
  'HRI',
  'CL',
  'CHS',
  'HRI',
  'CL',
  'CHS',
  'HRI',
  '12',
  'U',
  'SG',
  'HRI',
  'CL',
  'PEP',
  'EARBUDS',
  'SC',
  'BCN',
  'CHDDR',
  'ABF',
  'THINBRST',
  'ABF',
  'THINBRST',
  'HARD',
  'PROV',
  'DC',
  'DV',
  'RSE',
  'OIL',
  'M',
  'APPLE',
  '3',
  'BAG',
  'STOK',
  'LT',
  'SWT',
  'PEANUT',
  'BUTTER',
  'AVO',
  'VERDE',
  'ROLLS',
  'BTS',
  'DRY',
  'BLON',
  'GALE',
  'TR',
  'HS',
  'FRM',
  '4',
  'BAGELS',
  'GV',
  'SLIDERS',
  'ACCESSORY',
  'CHEEZE',
  'IT',
  'RITZ',
  'RUFFLES',
  'GV',
  'HNY',
  'GRMS'],
 ['SW', 'HRO', 'FGHTR'],
 ['KTTL',
  'SEA',
  'SALT',
  'POT',
  'CHP',
  'BRAIDED',
  'BRIOCHE',
  'CHEF',
  'PLATE',
  'MEAL'],
 ['Woman',
  'Ham',
  'Cheese',
  'Ice',
  'Java',
  'Tea',
  'Mineral',
  'Water',
  'Black',
  '&',
  'White'],
 ['6', 'WING', 'PLATE', 'ASST', '27', 'CUTIE', 'CAR'],
 ['FF',
  'BS',
  'BREAST',
  'KS',
  'DICED',
  'TOM',
  'JACKORGSALSE',
  '18CT',
  'EGGS',
  'GRAPE',
  'TOMATO',
  'ECO',
  'HALF',
  'PAN',
  'GRND',
  'TURKEY',
  'CHPD',
  'ONION',
  'MONT',
  'JACK',
  '2#'],
 ['N',
  'YORK',
  'TX',
  'GRLC',
  'CHICKEN',
  'WINGS',
  'PICSWEET',
  'BLEND',
  'PICSWEET',
  'GREENS',
  'CHICKEN',
  'WINGS',
  'DIET',
  'LN',
  'GRND',
  'B',
  'PORK',
  'LOIN',
  'CHOP',
  'DRUMSTICKS',
  'EYE',
  'RND',
  'STK',
  'FP',
  'EYE',
  'RND',
  'STK',
  'FP',
  'BEEF',
  'FOR',
  'STEW',
  'PORK',
  '1',
  '2',
  'LOIN',
  'B',
  'BEST',
  'S',
  'STEAK',
  'BAR',
  'S',
  'MEAT',
  'FRNK',
  '0',
  'M',
  'HAM',
  'WINCO',
  'NOODLES',
  'TNDRBRD',
  'CHIX',
  'BR',
  'KRFT',
  'DELUXE',
  'MAC',
  'EGGO',
  'WAFFLE',
  'L',
  'D',
  'NUTTY',
  'BARS',
  'L',
  'D',
  'OATMEAL',
  'CRM',
  'LINKS',
  'MILD',
  'KEEB',
  'TOWNHOUSE',
  'TURKEY',
  'FRANK',
  'RESERS',
  'POT',
  'SLD',
  'DM',
  'SPAG',
  'SAUCE',
  'HUNTS',
  'MANWICH',
  'DM',
  'PNAPL',
  'CHNKY',
  'SUNBEAN',
  'BUNS',
  'WINCO',
  'SNDWCH',
  'WT',
  'WINCO',
  'HOMO',
  'MILK',
  'EGGS'],
 ['LAZENBY',
  'WORCESTER',
  'SAUCE',
  'MILKY',
  'BAR',
  'CHOC',
  'SMOKED',
  'VIENNAS',
  'PEALED',
  'PEACHES',
  'MEDITERRANEAN',
  'MIX',
  'SPAR',
  'COOKING',
  'OIL',
  'F',
  'L',
  'ENGLISH',
  'CUCUMB',
  'NESTLE',
  'AERO',
  'CADBURY',
  'DAIRY',
  'MI',
  'GRAPES',
  'MIXED',
  'TUB',
  'TASTIC',
  'RICE',
  'BLACK',
  'CAT',
  'SMOOTH',
  'CARRIER',
  'BAG',
  '24L',
  'BANANAS',
  'LOOSE'],
 ['PL',
  "TORTILLA'S",
  'CAGE',
  'FREE',
  'ALL',
  'WHIT',
  'BLACK',
  'BEANS',
  'Frozen',
  'Mangoes',
  '16o',
  'Whole',
  'Strawberries',
  'OG',
  'LF',
  'COTTAGE',
  'CHEE',
  'MAHI',
  'MAHI',
  'FILLETS',
  '$2',
  'off',
  '(1)',
  'WC',
  'Fill',
  'California',
  'Harvest',
  'PLUMS',
  'BLACK',
  'CV'],
 ['SW', 'FIGURES', 'TOOTHBRUSH', 'WOMEN', 'SLIPPE'],
 ['GIFT', 'CARD'],
 ['EQUATE',
  'LINER',
  'VFUS',
  'ENG',
  'POM',
  'DAWN',
  'ORIG',
  '50YD',
  'PKGTAPE',
  '50YD',
  'PKGTAPE',
  'LINT',
  'POLLER2'],
 ['DIABETES'],
 ['6', 'WING', 'PLATE', 'ASST', '27', 'CUTIE', 'CAR'],
 ['BANANAS',
  'BEVERAGE',
  'OS',
  'CRAN',
  'POM',
  'STRWBRY',
  'CC',
  'CAMPARI',
  'TOM',
  'KFT',
  'SINGLES',
  'HARD',
  'SALAMI',
  'AVOCADO',
  'PILLS',
  'WHITE',
  'SH',
  'NYLON',
  'COL',
  'HAND',
  'CLEANER',
  'INJECTR',
  'CLNR'],
 ['GV',
  'WATER',
  'GV',
  'WATER',
  'GREAT',
  'VALUE',
  'GREAT',
  'VALUE',
  'GAIN',
  'LFE',
  'GAIN',
  'GV',
  'VEG',
  'OIL',
  'SNICKER',
  'FS',
  'GDBR',
  'FS',
  '6PK',
  'MIXED',
  'CHOC',
  'CHERRIES',
  'GAIN',
  'HY',
  'PSTLEG',
  'PM',
  'BATTERIES',
  'GV',
  '20Z',
  'MINI',
  'CRNBRY',
  'SAUCE',
  'SPREADS',
  'EGGS',
  '6CT'],
 ['GRILL', 'COVER', 'FIBER', 'CHOICE', 'CELERY', 'HEART', 'RED', 'GRAPE']]

# Recognize dataset images. Output metricies

In [7]:
for i in range(len(valid_data)):
    start_time = time.perf_counter()
    
    image_path = f'{path}/images/{i}.jpg'
    result = reader.readtext(image_path, detail=0)
    metrics = calculate_accuracy(result, valid_data[i])
    
    end_time = time.perf_counter()
    print(f'Image {i}:')
    print(f"  Precision: {metrics['precision']:.2f}%")
    print(f"  Recall: {metrics['recall']:.2f}%")
    print(f"  F1-Score: {metrics['f1_score']:.2f}%")
    print(f"  Accuracy: {metrics['accuracy']:.2f}%")
    print(f"  Time: {end_time - start_time}")
    print('-' * 30)

Image 0:
  Precision: 2.08%
  Recall: 50.00%
  F1-Score: 4.00%
  Accuracy: 50.00%
  Time: 4.495080200023949
------------------------------
Image 1:
  Precision: 0.00%
  Recall: 0.00%
  F1-Score: 0.00%
  Accuracy: 0.00%
  Time: 4.798374000005424
------------------------------
Image 2:
  Precision: 0.00%
  Recall: 0.00%
  F1-Score: 0.00%
  Accuracy: 0.00%
  Time: 3.677039199974388
------------------------------
Image 3:
  Precision: 10.23%
  Recall: 16.98%
  F1-Score: 12.77%
  Accuracy: 13.24%
  Time: 5.329171699937433
------------------------------
Image 4:
  Precision: 0.00%
  Recall: 0.00%
  F1-Score: 0.00%
  Accuracy: 0.00%
  Time: 2.1925840000621974
------------------------------
Image 5:
  Precision: 5.81%
  Recall: 50.00%
  F1-Score: 10.42%
  Accuracy: 50.00%
  Time: 19.386110199964605
------------------------------
Image 6:
  Precision: 6.38%
  Recall: 27.27%
  F1-Score: 10.34%
  Accuracy: 27.27%
  Time: 9.060395799926482
------------------------------
Image 7:
  Precision: 0.00%