In [52]:
from PIL import Image, ImageEnhance, ImageFilter, ImageOps
import pytesseract
import re
import os

pytesseract.pytesseract.tesseract_cmd = r'C:\Users\AW581PP\AppData\Local\Programs\Tesseract-OCR\tesseract.exe'


def preprocess_image(image_path):
    image = Image.open(image_path)
    gray_image = image.convert('L')
    enhancer = ImageEnhance.Contrast(gray_image)
    contrast_image = enhancer.enhance(3)
    sharpened_image = contrast_image.filter(ImageFilter.SHARPEN)
    inverted_image = ImageOps.invert(sharpened_image)
    return inverted_image

def ocr_and_extract_number(image):
    custom_config = r'--oem 3 --psm 6 outputbase digits'
    text = pytesseract.image_to_string(image, config=custom_config)
    numbers = re.findall(r'\d+', text)
    return numbers


def crop_to_numbers(image, save_path):
    width, height = image.size
    crops = {
        'altura': (int(312 * 0.75), 0, 312, int(239 * 0.2)),   
        'largo': (int(width * 0.66), int(height * 0.8), width, height),
        'ancho': (int(width * 0.05), int(height * 0.85), int(width * 0.3), height)  
    }
    cropped_images = {}
    for side, coords in crops.items():
        cropped_image = image.crop(coords)
        cropped_images[side] = cropped_image

        cropped_image.save(os.path.join(save_path, f'{side}_crop.png'))
    return cropped_images


def extract_numbers_from_image(image_path, save_path):
    preprocessed_image = preprocess_image(image_path)
    cropped_images = crop_to_numbers(preprocessed_image, save_path)
    numbers = {}
    for side, img in cropped_images.items():
        numbers[side] = ocr_and_extract_number(img)
    return numbers


image_path = 'extracted_image.png'
save_path = 'path_to_save_crops'

if not os.path.exists(save_path):
    os.makedirs(save_path)

extracted_numbers = extract_numbers_from_image(image_path, save_path)
print(extracted_numbers)


{'altura': [], 'largo': ['1219'], 'ancho': ['1016']}
