In [1]:
import re
import pytesseract
from PIL import Image


In [2]:

# Function to extract text from an image using pytesseract OCR
def extract_text_from_image(image_path):
    try:
        image = Image.open(image_path)
        text = pytesseract.image_to_string(image)
        return text
    except Exception as e:
        print(f"Error opening or processing image: {e}")
        return ""

# Function to infer entity values (e.g., width, height) from the OCR-extracted text
def infer_entity_values_from_text(text):
    # Initialize empty dictionary to hold extracted values
    extracted_values = {}

    # Regex pattern to extract numbers followed by common units (cm, mm, kg, etc.)
    pattern = r'(\d+)\s*(cm|mm|kg|m|C)'

    # Find all matches that fit the pattern
    matches = re.findall(pattern, text, re.IGNORECASE)
    
    # Iterate through the matches and infer the dimension/entity
    for match in matches:
        value, unit = match
        value = value.strip()  # Clean the extracted value
        unit = unit.lower()

        # Infer width and height based on number of extracted dimensions
        if unit == 'cm' or unit == 'c':  # Handle both "cm" and incorrectly recognized "C"
            if "width" not in extracted_values:
                extracted_values["width"] = f"{value} cm"
            elif "height" not in extracted_values:
                extracted_values["height"] = f"{value} cm"
            else:
                extracted_values["depth"] = f"{value} cm"

    return extracted_values

# Complete process function: loads image, extracts text, and infers values
def process_image_for_entity_values(image_path, expected_entity):
    # Extract text from the image
    extracted_text = extract_text_from_image(image_path)

    # Infer entity values from the extracted text
    inferred_values = infer_entity_values_from_text(extracted_text)

    # Return the value for the expected entity if it exists, or blank otherwise
    return inferred_values.get(expected_entity, "")


In [3]:

# Test the function on a sample image (replace with actual image path and expected entity)
image_path = '/Users/kaustubh/Downloads/student_resource 3/images/41-NCxNuBxL.jpg'  # Replace with your actual image path
expected_entity = 'width'  # Specify the expected entity (e.g., 'width', 'height', etc.)

# Run the process
extracted_value = process_image_for_entity_values(image_path, expected_entity)

print(f"Extracted value for {expected_entity}: {extracted_value}")


Extracted value for width: 20 cm
