In [5]:
import easyocr
import cv2
from tqdm import tqdm
import os
import csv
import re

reader = easyocr.Reader(['en'], gpu=True)

def extract_text_from_image(image_path):
    image = cv2.imread(image_path)
    results = reader.readtext(image)
    extracted_text = ' '.join([result[1] for result in results])
    return extracted_text

def parse_measurement(text):
    # Expanded pattern to catch more variations
    pattern = r'(\d+(?:\.\d+)?)\s*(gram|centimetre|ounce|kilogram|g|cm|oz|kg|foot|ft|inch|in|metre|m|millimetre|mm|ton|t|volt|v|watt|w|kilovolt|kv|kilowatt|kw|pound|lb|yard|yd|millivolt|mv|microgram|μg)'
    match = re.search(pattern, text.lower())
    if match:
        value, unit = match.groups()
        # Normalize units
        unit_map = {
            'g': 'gram', 'cm': 'centimetre', 'oz': 'ounce', 'kg': 'kilogram',
            'ft': 'foot', 'in': 'inch', 'm': 'metre', 'mm': 'millimetre',
            't': 'ton', 'v': 'volt', 'w': 'watt', 'kv': 'kilovolt',
            'kw': 'kilowatt', 'lb': 'pound', 'yd': 'yard', 'mv': 'millivolt',
            'μg': 'microgram'
        }
        unit = unit_map.get(unit, unit)
        return f"{float(value)} {unit}"
    return ""

image_folder = '../images'
output_file = 'predictions.csv'

with open(output_file, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['index', 'prediction'])
    
    for index, file in enumerate(tqdm(os.listdir(image_folder))):
        image_path = os.path.join(image_folder, file)
        try:
            extracted_text = extract_text_from_image(image_path)
            prediction = parse_measurement(extracted_text)
        except Exception as e:
            print(f"Error processing {file}: {str(e)}")
            prediction = ""
        writer.writerow([index, prediction])

print(f"CSV file '{output_file}' has been created with the predictions.")

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.
100%|██████████| 54/54 [09:40<00:00, 10.76s/it]

CSV file 'predictions.csv' has been created with the predictions.



