In [5]:
import cv2
import pytesseract
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

def get_text_order(image_path, font_size, font_type):
    # Load the image
    image = cv2.imread(image_path)

    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply thresholding to preprocess the image
    _, threshold = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    # Create a kernel for morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))

    # Perform morphological operations to enhance text extraction
    morphed = cv2.morphologyEx(threshold, cv2.MORPH_CLOSE, kernel, iterations=2)

    # Find contours in the image
    contours, _ = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Sort the contours based on their bounding box coordinates
    contours = sorted(contours, key=lambda x: cv2.boundingRect(x)[0])

    # Initialize a list to store the text and its corresponding bounding boxes
    text_boxes = []

    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)

        # Extract the region of interest (ROI) containing the text
        roi = gray[y:y + h, x:x + w]

        # Convert the ROI to an image object
        pil_roi = Image.fromarray(roi)

        # Perform OCR using Tesseract to extract the text
        text = pytesseract.image_to_string(pil_roi)

        # Store the text and its bounding box coordinates
        text_boxes.append((text, x, y, w, h))

    # Filter text based on font size and type
    filtered_text_boxes = [
        (text, x, y, w, h) for text, x, y, w, h in text_boxes if get_font_size(image_path, x, y, w, h) == font_size and get_font_type(image_path, x, y, w, h) == font_type
    ]

    # Sort the filtered text boxes based on their y-coordinate (top to bottom)
    sorted_text_boxes = sorted(filtered_text_boxes, key=lambda x: x[2])

    # Print the order of the text
    for i, (text, _, _, _, _) in enumerate(sorted_text_boxes):
        print(f"User looks at text {i + 1}: {text}")

def get_font_size(image_path, x, y, w, h):
    # Load the image
    image = cv2.imread(image_path)

    # Calculate the average pixel intensity within the bounding box
    roi = image[y:y + h, x:x + w]
    average_intensity = np.mean(roi)

    # Determine the font size based on average intensity (adjust these thresholds as needed)
    if average_intensity < 100:
        font_size = "Small"
    elif average_intensity < 150:
        font_size = "Medium"
    else:
        font_size = "Large"

    return font_size

def get_font_type(image_path, x, y, w, h):
    # Load the image
    image = cv2.imread(image_path)

    # Calculate the average intensity within the bounding box
    roi = image[y:y + h, x:x + w]
    average_intensity = np.mean(roi)

    # Determine the font type based on average intensity (adjust these thresholds as needed)
    if average_intensity < 100:
        font_type = "Sans-serif"
    elif average_intensity < 150:
        font_type = "Serif"
    else:
        font_type = "Handwriting"

    return font_type

# Provide the path to the image
image_path = "text.png"

# Specify the font size and type you want to analyze
font_size = "Medium"
font_type = "Sans-serif"

# Call the function to get the text order
print(get_text_order(image_path, font_size, font_type))


None


In [6]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.cm as cm

def calculate_saliency_map(image_path):
    # Load the image
    image = cv2.imread(image_path)

    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Create a saliency object
    saliency = cv2.saliency.StaticSaliencyFineGrained_create()

    # Calculate the saliency map
    _, saliency_map = saliency.computeSaliency(gray)

    # Normalize the saliency map to have values between 0 and 1
    saliency_map = (saliency_map - np.min(saliency_map)) / (np.max(saliency_map) - np.min(saliency_map))

    return saliency_map

def visualize_saliency_map(image_path, saliency_map):
    # Load the image
    image = cv2.imread(image_path)

    # Resize the saliency map to match the image size
    saliency_map = cv2.resize(saliency_map, (image.shape[1], image.shape[0]))

    # Apply a colormap to the saliency map
    saliency_map_colored = cm.jet(saliency_map)

    # Overlay the saliency map on the image
    saliency_overlay = cv2.addWeighted(image, 0.7, saliency_map_colored, 0.3, 0)

    # Display the image with the saliency map
    fig, ax = plt.subplots(figsize=(10, 8))
    ax.imshow(cv2.cvtColor(saliency_overlay, cv2.COLOR_BGR2RGB))
    ax.axis('off')
    plt.show()

# Provide the path to the image
image_path = "text.png"

# Calculate the saliency map
saliency_map = calculate_saliency_map(image_path)

# Visualize the saliency map overlaid on the image
visualize_saliency_map(image_path, saliency_map)


AttributeError: module 'cv2' has no attribute 'saliency'