#**Install neccesary libraries**

In [None]:
!pip install pytesseract
!pip install sentence-transformers
!pip install faiss-cpu
!pip install pytesseract
!apt-get install -y tesseract-ocr
!pip install easyocr

#**Lightweight Image to text code**

In [46]:
import requests
from PIL import Image
from io import BytesIO
import pytesseract

# Function to download the image from a URL
def get_image_from_url(url):
    response = requests.get(url)
    img = Image.open(BytesIO(response.content))
    return img

# Example image URL
image_url = 'https://m.media-amazon.com/images/I/318TVw4iM1L.jpg'

# Download and open the image
image = get_image_from_url(image_url)

# Use Tesseract to extract text from the image
text = pytesseract.image_to_string(image)

# Print the extracted text
print("Extracted Text:")
print(text)


Extracted Text:
 



#**Heavyweight**


In [None]:
import requests
from PIL import Image, ImageEnhance, ImageFilter
from io import BytesIO
import easyocr

# Enable GPU for faster performance in Colab
reader = easyocr.Reader(['en'], gpu=True)

# Function to download the image from a URL
def get_image_from_url(url):
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        img = Image.open(BytesIO(response.content))
        return img
    except requests.exceptions.RequestException as e:
        print(f"Error fetching image: {e}")
        return None

# Function to preprocess the image for faster OCR
def preprocess_image(image):
    image = image.convert("L")  # Convert to grayscale
    image = image.filter(ImageFilter.MedianFilter())  # Light median filter for noise reduction
    return image

# Resize image to a max width (optional)
def resize_image(image, max_width=1000):
    width_percent = max_width / float(image.size[0])
    height_size = int(float(image.size[1]) * width_percent)
    return image.resize((max_width, height_size), Image.ANTIALIAS)

# Example image URL
image_url = 'https://m.media-amazon.com/images/I/81IYdOV0mVL.jpg'

# Download and open the image
image = get_image_from_url(image_url)

if image:
    # Preprocess and resize the image
    image = preprocess_image(image)
    image = resize_image(image)

    # Save the processed image
    image.save('image.jpg')

    # Run EasyOCR on the processed image with GPU enabled
    result = reader.readtext('image.jpg', detail=0)  # Set detail=0 for faster extraction

    # Print the extracted text
    print("Extracted Text:")
    print(' '.join(result))
else:
    print("Failed to download or process the image.")


  return image.resize((max_width, height_size), Image.ANTIALIAS)


#**Transformer**

In [22]:
import re
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Step 1: Initialize the Sentence Transformer model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Step 2: Define entities and their associated keywords/units
entities = ["weight", "dimensions", "volume", "voltage"]
units = ["kg", "gram", "cm", "inch", "litre", "ml", "volt"]

# Step 3: Encode entities into vectors using the sentence transformer
entity_embeddings = model.encode(entities)

# Step 4: Set up a FAISS index for vector similarity search
dimension = entity_embeddings.shape[1]  # Embedding size (typically 768 for this model)
index = faiss.IndexFlatL2(dimension)  # L2 distance index
index.add(entity_embeddings)  # Add entity embeddings to the FAISS index

# Function to find the closest entity using vector search
def find_closest_entity(entity_name):
    entity_embedding = model.encode([entity_name])
    distances, indices = index.search(entity_embedding, 1)  # Top 1 closest result
    return entities[indices[0][0]]  # Return the closest entity

# Function to extract number + unit pattern from the text
def extract_value(text, unit_list):
    # Build a regex pattern that looks for a number followed by a unit
    pattern = r"(\d+(\.\d+)?)\s?(" + "|".join(unit_list) + ")"
    match = re.search(pattern, text)
    if match:
        return match.group(0)  # Return the full match (e.g., "500 kg")
    return None

# Main function to process the text and find the entity-value pair
def get_entity_value(entity_name, text):
    # Find the closest entity using vector search
    closest_entity = find_closest_entity(entity_name)

    # Extract the value associated with the entity
    value = extract_value(text, units)

    # Return the result as a dictionary or string
    if value:
        return f"{closest_entity} = {value}"
    else:
        return f"No value found for {closest_entity}"

# Example usage
entity = "weight"
text = "the item weighs 500 kg"
result = get_entity_value(entity, text)

print(result)


ModuleNotFoundError: No module named 'sentence_transformers'

#**Testing1**

In [39]:
import requests
from PIL import Image, ImageEnhance, ImageFilter
from io import BytesIO
import pytesseract
import re
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Step 1: Initialize the Sentence Transformer model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Step 2: Define entities and their associated keywords/units
entities = ["weight", "dimensions", "volume", "voltage"]
units = ["kg", "gram", "cm", "inch", "litre", "ml", "volt", "v"]

# Step 3: Encode entities into vectors using the sentence transformer
entity_embeddings = model.encode(entities)

# Step 4: Set up a FAISS index for vector similarity search
dimension = entity_embeddings.shape[1]  # Embedding size
index = faiss.IndexFlatL2(dimension)  # L2 distance index
index.add(entity_embeddings)  # Add entity embeddings to the FAISS index

# Function to find the closest entity using vector search
def find_closest_entity(entity_name):
    entity_embedding = model.encode([entity_name])
    distances, indices = index.search(entity_embedding, 1)  # Top 1 closest result
    return entities[indices[0][0]]  # Return the closest entity

# Function to extract number + unit pattern from the text
def extract_value(text, unit_list):
    # Refined regex pattern to handle multiple cases
    pattern = r"(\d+(\.\d+)?)\s?([" + "|".join(unit_list) + r"])"
    match = re.search(pattern, text, re.IGNORECASE)  # Added case insensitivity
    if match:
        return match.group(0)  # Return the full match (e.g., "500 kg", "3.7v")
    return None

# Main function to process the text and find the entity-value pair
def get_entity_value(entity_name, text):
    # Find the closest entity using vector search
    closest_entity = find_closest_entity(entity_name)

    # Extract the value associated with the entity
    value = extract_value(text, units)

    # Return the result as a dictionary or string
    if value:
        return f"{closest_entity} = {value}"
    else:
        return f"No value found for {closest_entity}"

# Step 5: Function to download and preprocess the image
def get_image_from_url(url):
    response = requests.get(url)
    img = Image.open(BytesIO(response.content))

    # Preprocess the image for better OCR
    img = preprocess_image(img)
    return img

# Preprocessing function to improve OCR accuracy
def preprocess_image(image):
    # Convert to grayscale
    image = image.convert("L")

    # Apply thresholding to increase contrast
    image = image.point(lambda p: p > 128 and 255)  # Simple thresholding

    # Optionally, apply a slight blur to remove noise
    image = image.filter(ImageFilter.MedianFilter(size=3))

    return image

# Step 6: Main function to extract text from image and process entity-value extraction
def process_image(image_url, entity_name):
    # Download and open the image
    image = get_image_from_url(image_url)

    # Use Tesseract to extract text from the image
    text = pytesseract.image_to_string(image)

    # Print the extracted text for debugging
    print("Extracted Text:")
    print(text)

    # Process the text to find entity-value pair
    result = get_entity_value(entity_name, text)

    # Print the final result
    print(f"Result for '{entity_name}': {result}")

# Example usage:
image_url = 'https://m.media-amazon.com/images/I/21vv80MKQEL.jpg'
entity = "volt"
process_image(image_url, entity)


Extracted Text:
 

Result for 'volt': No value found for voltage


#**Testing2**

In [7]:
import requests
from PIL import Image, ImageEnhance, ImageFilter
from io import BytesIO
import pytesseract
import re
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Step 1: Initialize the Sentence Transformer model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Step 2: Define entities and their associated keywords/units
entities = ["weight", "dimensions", "volume", "voltage"]
units = ["kg", "gram", "cm", "inch", "litre", "ml", "volt", "v", "oz"]

# Step 3: Encode entities into vectors using the sentence transformer
entity_embeddings = model.encode(entities)

# Step 4: Set up a FAISS index for vector similarity search
dimension = entity_embeddings.shape[1]  # Embedding size
index = faiss.IndexFlatL2(dimension)  # L2 distance index
index.add(entity_embeddings)  # Add entity embeddings to the FAISS index

# Function to find the closest entity using vector search
def find_closest_entity(entity_name):
    entity_embedding = model.encode([entity_name])
    distances, indices = index.search(entity_embedding, 1)  # Top 1 closest result
    return entities[indices[0][0]]  # Return the closest entity

# Function to extract number + unit pattern from the text
def extract_value(text, unit_list):
    # Refined regex pattern to handle multiple cases
    pattern = r"(\d+(\.\d+)?)\s?([" + "|".join(unit_list) + r"])"
    match = re.search(pattern, text, re.IGNORECASE)  # Added case insensitivity
    if match:
        return match.group(0)  # Return the full match (e.g., "500 kg", "3.7v")
    return None

# Main function to process the text and find the entity-value pair
def get_entity_value(entity_name, text):
    # Find the closest entity using vector search
    closest_entity = find_closest_entity(entity_name)

    # Extract the value associated with the entity
    value = extract_value(text, units)

    # Return the result as a dictionary or string
    if value:
        return f"{closest_entity} = {value}"
    else:
        return f"No value found for {closest_entity}"

# Preprocessing function to improve OCR accuracy
def preprocess_image(image):
    # Convert to grayscale
    image = image.convert("L")

    # Apply thresholding to increase contrast
    image = image.point(lambda p: p > 128 and 255)  # Simple thresholding

    # Optionally, apply a slight blur to remove noise
    image = image.filter(ImageFilter.MedianFilter(size=3))

    return image

# Step 5: Function to download the image from a URL
def get_image_from_url(url):
    response = requests.get(url)
    img = Image.open(BytesIO(response.content))

    # Preprocess the image for better OCR
    img = preprocess_image(img)
    return img

# Step 6: Main function to extract text from image and process entity-value extraction
def process_image(image_url, entity_name):
    # Download and open the image
    image = get_image_from_url(image_url)

    # Use Tesseract to extract text from the image
    text = pytesseract.image_to_string(image)

    # Print the extracted text for debugging
    print("Extracted Text:")
    print(text)

    # Process the text to find entity-value pair
    result = get_entity_value(entity_name, text)

    # Print the final result
    print(f"Result for '{entity_name}': {result}")

# Example usage:
image_url = 'https://m.media-amazon.com/images/I/3131mkESkQL.jpg'
entity = "oz"
process_image(image_url, entity)




Extracted Text:
 

Result for 'oz': No value found for weight
