
# Extracción de Texto desde Imágenes con OCR
Este cuaderno utiliza `OpenCV` y `Pytesseract` para extraer texto de imágenes.
Incluye pasos detallados para cargar imágenes, preprocesarlas, aplicar técnicas de 
detección de contornos y extraer texto mediante OCR.


In [2]:
# Import required packages
import cv2
import pytesseract
import matplotlib.pyplot as plt

In [17]:
# si se descarga la imagen de internet, se puede usar el siguiente código:
# Mention the installed location of Tesseract-OCR in your system
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

In [14]:

# Read the image from which text needs to be extracted
img = cv2.imread("data/text_images/imagen1.jpeg")

# Convert the image to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)


In [15]:

# Perform OTSU thresholding
ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)

# Define the dilation kernel
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25, 25))

# Apply dilation on the thresholded image
dilation = cv2.dilate(thresh1, rect_kernel, iterations=1)


In [18]:
# Find contours
contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

# Create a copy of the image
im2 = gray.copy()

cnt_list = []
for cnt in contours:
    x, y, w, h = cv2.boundingRect(cnt)
    # Draw rectangles and crop regions
    rect = cv2.rectangle(im2, (x, y), (x + w, y + h), (0, 255, 0), 5)
    cropped = im2[y:y + h, x:x + w]
    # Apply OCR on the cropped image
    text = pytesseract.image_to_string(cropped)
    cnt_list.append([x, y, text])

# Sort text blocks by vertical position (y-coordinate)
sorted_list = sorted(cnt_list, key=lambda x: x[1])


In [19]:

# Write the recognized text to a file
file = open("report/recognized.txt", "w+")
file.write("")
file.close()

for x, y, text in sorted_list:
    with open("report/recognized.txt", "a") as file:
        file.write(text + "\n")


In [20]:

# Resize images for display
rgb_image = cv2.resize(im2, (0, 0), fx=0.4, fy=0.4)
dilation = cv2.resize(dilation, (0, 0), fx=0.4, fy=0.4)

# Display the processed images
cv2.imshow('Dilation', dilation)
cv2.imshow('Grayscale', gray)

# Wait for user interaction and close windows
cv2.waitKey(0)
cv2.destroyAllWindows()
