In [2]:
import cv2
import numpy as np

def auto_align_image(image):
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Adaptive thresholding for Canny edge detection
    median_intensity = np.median(gray)
    lower_threshold = int(max(0, (1.0 - 0.33) * median_intensity))
    upper_threshold = int(min(255, (1.0 + 0.33) * median_intensity))
    edges = cv2.Canny(gray, lower_threshold, upper_threshold)

    # Apply Probabilistic Hough Line Transform with adaptive parameters
    minLineLength = max(50, int(0.1 * max(image.shape)))
    maxLineGap = max(10, int(0.05 * max(image.shape)))
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, 100, minLineLength=minLineLength, maxLineGap=maxLineGap)

    if lines is not None:
        # Filter out short and nearly horizontal/vertical lines
        filtered_lines = []
        for line in lines:
            x1, y1, x2, y2 = line[0]
            length = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
            angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
            if length > 100 and not (80 < abs(angle) < 100):
                filtered_lines.append(line)

        # Recalculate angles for filtered lines
        angles = [np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi for line in filtered_lines]

        if angles:
            # Calculate the median angle
            median_angle = np.median(angles)

            # Rotate the image to correct the skew
            center = (image.shape[1] // 2, image.shape[0] // 2)
            rotation_matrix = cv2.getRotationMatrix2D(center, median_angle, 1.0)
            aligned_image = cv2.warpAffine(image, rotation_matrix, (image.shape[1], image.shape[0]), borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255))
            return aligned_image
        else:
            print("No valid angles found.")
            return image
    else:
        print("No lines detected.")
        return image

if __name__ == '__main__':
    # Specify the path to the image
    image_path = "5.png"

    # Read image to be aligned
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)

    # Check if image was successfully loaded
    if image is None:
        print(f"Error: Could not load image at {image_path}")
    else:
        # Align the image
        aligned_image = auto_align_image(image)

        # Save the aligned image
        output_path = "aligned_image.jpg"
        cv2.imwrite(output_path, aligned_image)
        print(f"Aligned image saved as {output_path}")

Aligned image saved as aligned_image.jpg


In [11]:
import cv2
import numpy as np
import pytesseract
from PIL import Image

def auto_align_image(image):
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Adaptive thresholding for Canny edge detection
    median_intensity = np.median(gray)
    lower_threshold = int(max(0, (1.0 - 0.33) * median_intensity))
    upper_threshold = int(min(255, (1.0 + 0.33) * median_intensity))
    edges = cv2.Canny(gray, lower_threshold, upper_threshold)

    # Apply Probabilistic Hough Line Transform with adaptive parameters
    minLineLength = max(50, int(0.1 * max(image.shape)))
    maxLineGap = max(10, int(0.05 * max(image.shape)))
    lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 100, minLineLength=minLineLength, maxLineGap=maxLineGap)

    if lines is not None:
        # Filter out short and nearly horizontal/vertical lines
        filtered_lines = []
        for line in lines:
            x1, y1, x2, y2 = line[0]
            length = np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
            angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
            if length > 100 and not (80 < abs(angle) < 100):
                filtered_lines.append(line)

        # Recalculate angles for filtered lines
        angles = [np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi for line in filtered_lines]

        if angles:
            # Calculate the median angle
            median_angle = np.median(angles)

            # Rotate the image to correct the skew
            center = (image.shape[1] // 2, image.shape[0] // 2)
            rotation_matrix = cv2.getRotationMatrix2D(center, median_angle, 1.0)
            aligned_image = cv2.warpAffine(image, rotation_matrix, (image.shape[1], image.shape[0]),
                                           borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255))
            return aligned_image
        else:
            print("No valid angles found.")
            return image
    else:
        print("No lines detected.")
        return image

def process_image(image_path):
    # Read the image
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)

    # Check if the image was successfully loaded
    if image is None:
        print(f"Error: Could not load image at {image_path}")
        return

    # Align the image
    aligned_image = auto_align_image(image)

    # Convert the image to grayscale
    gray = cv2.cvtColor(aligned_image, cv2.COLOR_BGR2GRAY)

    # Use Tesseract OCR to extract text from the image
    text = pytesseract.image_to_string(Image.fromarray(gray))

    # Print the extracted text
    print(f"Extracted text:\n{text}")

    # Save the aligned image
    output_path = "aligned_image.jpg"
    cv2.imwrite(output_path, aligned_image)
    print(f"Aligned image saved as {output_path}")

if __name__ == '__main__':
    # Specify the path to the image
    image_path = "Лабораторные\\tree.png"  # Replace with the actual path to your image
    process_image(image_path)

Error: Could not load image at Лабораторные\tree.png
