In [1]:
pip install opencv-python numpy imutils


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
#EAST model that will box the texts and add to a directory "detected", check last lines of this cell 

import cv2
import numpy as np
from imutils.object_detection import non_max_suppression

def decode_predictions(scores, geometry, min_confidence=0.5):
    (num_rows, num_cols) = scores.shape[2:4]
    rectangles = []
    confidences = []
    
    for y in range(0, num_rows):
        scores_data = scores[0, 0, y]
        x_data0 = geometry[0, 0, y]
        x_data1 = geometry[0, 1, y]
        x_data2 = geometry[0, 2, y]
        x_data3 = geometry[0, 3, y]
        angles_data = geometry[0, 4, y]
        
        for x in range(0, num_cols):
            if scores_data[x] < min_confidence:
                continue
            
            offset_x, offset_y = (x * 4.0, y * 4.0)
            angle = angles_data[x]
            cos = np.cos(angle)
            sin = np.sin(angle)
            
            h = x_data0[x] + x_data2[x]
            w = x_data1[x] + x_data3[x]
            
            end_x = int(offset_x + (cos * x_data1[x]) + (sin * x_data2[x]))
            end_y = int(offset_y - (sin * x_data1[x]) + (cos * x_data2[x]))
            start_x = int(end_x - w)
            start_y = int(end_y - h)
            
            rectangles.append((start_x, start_y, end_x, end_y))
            confidences.append(scores_data[x])
    
    return (rectangles, confidences)

def detect_text_east(image_path, model_path, output_path=None):
    try:
        # Load the image
        img = cv2.imread(image_path)
        if img is None:
            raise FileNotFoundError(f"Image at path {image_path} not found.")
        orig = img.copy()
        (H, W) = img.shape[:2]
        
        # Define the new image dimensions
        newW, newH = (320, 320)
        rW = W / float(newW)
        rH = H / float(newH)
        
        # Resize the image
        img = cv2.resize(img, (newW, newH))

        # Load pre-trained EAST model
        net = cv2.dnn.readNet(model_path)

        # Prepare the image for EAST model
        blob = cv2.dnn.blobFromImage(img, 1.0, (newW, newH), 
                                     (123.68, 116.78, 103.94), swapRB=True, crop=False)
        net.setInput(blob)
        
        # Get scores and geometry data from the model
        (scores, geometry) = net.forward(["feature_fusion/Conv_7/Sigmoid", 
                                          "feature_fusion/concat_3"])
        
        # Decode the predictions to extract the bounding boxes
        rectangles, confidences = decode_predictions(scores, geometry, 0.5)

        # Apply non-maxima suppression to remove overlapping boxes
        boxes = non_max_suppression(np.array(rectangles), probs=confidences)
        
        # Scale bounding boxes back to original image size
        boxes = [(int(rH * startX), int(rW * startY), int(rH * endX), int(rW * endY))
                 for (startX, startY, endX, endY) in boxes]
        print("Detected text bounding box coordinates:")
        for box in boxes:
            print(f"Start (X, Y): ({box[0]}, {box[1]}), End (X, Y): ({box[2]}, {box[3]})")
        # Draw bounding boxes on the image
        for (startX, startY, endX, endY) in boxes:
            cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2)
        
        # If an output path is provided, save the image, else show it
        if output_path:
            cv2.imwrite(output_path, orig)
            print(f"Output saved to {output_path}")
        else:
            cv2.imshow("Text Detection", orig)
            cv2.waitKey(0)
            cv2.destroyAllWindows()
        
        return boxes, orig
    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage:
# Change the paths below according to your system
image_path = r"C:\Users\archi\Downloads\processed_images\processed_images\51NIEOGNLSS.jpg"  # Path to your input image
model_path = r"C:\Users\archi\Downloads\frozen_east_text_detection\frozen_east_text_detection.pb"  # Path to the EAST model
output_path = r"C:\Users\archi\Desktop\detected\11111.jpg"  # Path to save the output (optional)
# "C:\Users\archi\Desktop\3-1.txt"
# Call the function to detect text and save the output image
boxes, image_with_boxes = detect_text_east(image_path, model_path, output_path)


Detected text bounding box coordinates:
Start (X, Y): (518, 569), End (X, Y): (601, 604)
Start (X, Y): (435, 563), End (X, Y): (537, 604)
Start (X, Y): (432, 336), End (X, Y): (614, 384)
Start (X, Y): (473, 633), End (X, Y): (563, 668)
Start (X, Y): (470, 726), End (X, Y): (598, 800)
Start (X, Y): (499, 668), End (X, Y): (582, 704)
Output saved to C:\Users\archi\Desktop\detected\11111.jpg


In [2]:
#the boxed parts with EAST are cropped and saved to a directory "cropped_images"
import cv2
import numpy as np
import os
from imutils.object_detection import non_max_suppression

def decode_predictions(scores, geometry, min_confidence=0.5):
    (num_rows, num_cols) = scores.shape[2:4]
    rectangles = []
    confidences = []
    
    for y in range(0, num_rows):
        scores_data = scores[0, 0, y]
        x_data0 = geometry[0, 0, y]
        x_data1 = geometry[0, 1, y]
        x_data2 = geometry[0, 2, y]
        x_data3 = geometry[0, 3, y]
        angles_data = geometry[0, 4, y]
        
        for x in range(0, num_cols):
            if scores_data[x] < min_confidence:
                continue
            
            offset_x, offset_y = (x * 4.0, y * 4.0)
            angle = angles_data[x]
            cos = np.cos(angle)
            sin = np.sin(angle)
            
            h = x_data0[x] + x_data2[x]
            w = x_data1[x] + x_data3[x]
            
            end_x = int(offset_x + (cos * x_data1[x]) + (sin * x_data2[x]))
            end_y = int(offset_y - (sin * x_data1[x]) + (cos * x_data2[x]))
            start_x = int(end_x - w)
            start_y = int(end_y - h)
            
            rectangles.append((start_x, start_y, end_x, end_y))
            confidences.append(scores_data[x])
    
    return (rectangles, confidences)

def detect_and_save_cropped_text(image_path, model_path, output_folder):
    try:
        # Load the image
        img = cv2.imread(image_path)
        if img is None:
            raise FileNotFoundError(f"Image at path {image_path} not found.")
        orig = img.copy()
        (H, W) = img.shape[:2]
        
        # Define the new image dimensions
        newW, newH = (320, 320)
        rW = W / float(newW)
        rH = H / float(newH)
        
        # Resize the image
        img = cv2.resize(img, (newW, newH))

        # Load pre-trained EAST model
        net = cv2.dnn.readNet(model_path)

        # Prepare the image for EAST model
        blob = cv2.dnn.blobFromImage(img, 1.0, (newW, newH), 
                                     (123.68, 116.78, 103.94), swapRB=True, crop=False)
        net.setInput(blob)
        
        # Get scores and geometry data from the model
        (scores, geometry) = net.forward(["feature_fusion/Conv_7/Sigmoid", 
                                          "feature_fusion/concat_3"])
        
        # Decode the predictions to extract the bounding boxes
        rectangles, confidences = decode_predictions(scores, geometry, 0.5)

        # Apply non-maxima suppression to remove overlapping boxes
        boxes = non_max_suppression(np.array(rectangles), probs=confidences)
        
        # Scale bounding boxes back to original image size
        boxes = [(int(rH * startX), int(rW * startY), int(rH * endX), int(rW * endY))
                 for (startX, startY, endX, endY) in boxes]

        # Extract the base name of the image (without extension)
        base_name = os.path.splitext(os.path.basename(image_path))[0]

        # Create the output folder if it doesn't exist
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

        print("Detected text bounding box coordinates:")
        for idx, (startX, startY, endX, endY) in enumerate(boxes, start=1):
            print(f"Start (X, Y): ({startX}, {startY}), End (X, Y): ({endX}, {endY})")
            
            # Crop the detected text region from the original image
            cropped_img = orig[startY:endY, startX:endX]
            
            # Save the cropped image with incremental names
            cropped_image_path = os.path.join(output_folder, f"{base_name}_{idx}.jpg")
            cv2.imwrite(cropped_image_path, cropped_img)
            print(f"Cropped image saved at: {cropped_image_path}")

        return boxes, orig
    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage:
# Change the paths below according to your system
image_path = r"C:\Users\archi\Downloads\processed_images\processed_images\51NIEOGNLSS.jpg"  # Path to your input image
model_path = r"C:\Users\archi\Downloads\frozen_east_text_detection\frozen_east_text_detection.pb"  # Path to the EAST model
output_folder = r"C:\Users\archi\Desktop\cropped_images"  # Folder to save the cropped images

# Call the function to detect text and save the cropped text images
boxes, image_with_boxes = detect_and_save_cropped_text(image_path, model_path, output_folder)


Detected text bounding box coordinates:
Start (X, Y): (518, 569), End (X, Y): (601, 604)
Cropped image saved at: C:\Users\archi\Desktop\cropped_images\51NIEOGNLSS_1.jpg
Start (X, Y): (435, 563), End (X, Y): (537, 604)
Cropped image saved at: C:\Users\archi\Desktop\cropped_images\51NIEOGNLSS_2.jpg
Start (X, Y): (432, 336), End (X, Y): (614, 384)
Cropped image saved at: C:\Users\archi\Desktop\cropped_images\51NIEOGNLSS_3.jpg
Start (X, Y): (473, 633), End (X, Y): (563, 668)
Cropped image saved at: C:\Users\archi\Desktop\cropped_images\51NIEOGNLSS_4.jpg
Start (X, Y): (470, 726), End (X, Y): (598, 800)
Cropped image saved at: C:\Users\archi\Desktop\cropped_images\51NIEOGNLSS_5.jpg
Start (X, Y): (499, 668), End (X, Y): (582, 704)
Cropped image saved at: C:\Users\archi\Desktop\cropped_images\51NIEOGNLSS_6.jpg


In [2]:
pip install paddlepaddle paddleocr


Note: you may need to restart the kernel to use updated packages.


In [3]:
#used paddle ocr, that will extract the text from the boxed parts

import os
import cv2
import numpy as np
from paddleocr import PaddleOCR
from imutils.object_detection import non_max_suppression

# EAST Text Detection
def decode_predictions(scores, geometry, min_confidence=0.5):
    (num_rows, num_cols) = scores.shape[2:4]
    rectangles = []
    confidences = []
    
    for y in range(0, num_rows):
        scores_data = scores[0, 0, y]
        x_data0 = geometry[0, 0, y]
        x_data1 = geometry[0, 1, y]
        x_data2 = geometry[0, 2, y]
        x_data3 = geometry[0, 3, y]
        angles_data = geometry[0, 4, y]
        
        for x in range(0, num_cols):
            if scores_data[x] < min_confidence:
                continue
            
            offset_x, offset_y = (x * 4.0, y * 4.0)
            angle = angles_data[x]
            cos = np.cos(angle)
            sin = np.sin(angle)
            
            h = x_data0[x] + x_data2[x]
            w = x_data1[x] + x_data3[x]
            
            end_x = int(offset_x + (cos * x_data1[x]) + (sin * x_data2[x]))
            end_y = int(offset_y - (sin * x_data1[x]) + (cos * x_data2[x]))
            start_x = int(end_x - w)
            start_y = int(end_y - h)
            
            rectangles.append((start_x, start_y, end_x, end_y))
            confidences.append(scores_data[x])
    
    return (rectangles, confidences)

# Function to detect text regions using EAST and pass them to OCR
def detect_and_extract_text(image_path, east_model_path):
    # Load the image
    img = cv2.imread(image_path)
    if img is None:
        raise FileNotFoundError(f"Image at path {image_path} not found.")
    orig = img.copy()
    (H, W) = img.shape[:2]
    
    # Define the new image dimensions for EAST input
    newW, newH = (320, 320)
    rW = W / float(newW)
    rH = H / float(newH)
    
    # Resize the image for EAST
    img = cv2.resize(img, (newW, newH))

    # Load the EAST model
    net = cv2.dnn.readNet(east_model_path)

    # Prepare the image for the EAST model
    blob = cv2.dnn.blobFromImage(img, 1.0, (newW, newH), 
                                 (123.68, 116.78, 103.94), swapRB=True, crop=False)
    net.setInput(blob)
    
    # Get scores and geometry data from the model
    (scores, geometry) = net.forward(["feature_fusion/Conv_7/Sigmoid", 
                                      "feature_fusion/concat_3"])
    
    # Decode the predictions to extract the bounding boxes
    rectangles, confidences = decode_predictions(scores, geometry, 0.5)

    # Apply non-maxima suppression to remove overlapping boxes
    boxes = non_max_suppression(np.array(rectangles), probs=confidences)
    
    # Scale bounding boxes back to original image size
    boxes = [(int(rH * startX), int(rW * startY), int(rH * endX), int(rW * endY))
             for (startX, startY, endX, endY) in boxes]

    # Initialize the OCR model (PaddleOCR)
    ocr_model = PaddleOCR(use_angle_cls=True, lang='en')  # English language model

    # Iterate over the detected text regions
    for i, (startX, startY, endX, endY) in enumerate(boxes):
        # Crop the image using the detected coordinates
        cropped_image = orig[startY:endY, startX:endX]
        
        # Save the cropped image to a temporary location (optional, for debugging)
        cropped_image_path = f"cropped_image_{i}.jpg"
        cv2.imwrite(cropped_image_path, cropped_image)
        
        # Use OCR on the cropped image to extract text
        result = ocr_model.ocr(cropped_image_path, cls=True)
        
        # Extract the recognized text
        if result and result[0]:
            extracted_text = [line[1][0] for line in result[0]]
            print(f"Extracted text from region {i + 1}: {''.join(extracted_text)}")
        else:
            print(f"No text detected in region {i + 1}")

# Example usage:
image_path = r'C:\Users\archi\Downloads\processed_images\processed_images\51NIEOGNLSS.jpg'  # Path to your input image
east_model_path = r'C:\Users\archi\Downloads\frozen_east_text_detection\frozen_east_text_detection.pb'  # Path to the EAST model

# Call the function to detect text and extract the text from detected regions
detect_and_extract_text(image_path, east_model_path)


[2024/11/29 22:58:21] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='C:\\Users\\archi/.paddleocr/whl\\det\\en\\en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='C:\\Users\\archi/.paddleocr/whl\\rec\\en\\en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=

In [None]:
#final code with saving into the csv

import csv
import cv2
import numpy as np
import re
from paddleocr import PaddleOCR
from imutils.object_detection import non_max_suppression

# EAST Text Detection
def decode_predictions(scores, geometry, min_confidence=0.5):
    (num_rows, num_cols) = scores.shape[2:4]
    rectangles = []
    confidences = []
    
    for y in range(0, num_rows):
        scores_data = scores[0, 0, y]
        x_data0 = geometry[0, 0, y]
        x_data1 = geometry[0, 1, y]
        x_data2 = geometry[0, 2, y]
        x_data3 = geometry[0, 3, y]
        angles_data = geometry[0, 4, y]
        
        for x in range(0, num_cols):
            if scores_data[x] < min_confidence:
                continue
            
            offset_x, offset_y = (x * 4.0, y * 4.0)
            angle = angles_data[x]
            cos = np.cos(angle)
            sin = np.sin(angle)
            
            h = x_data0[x] + x_data2[x]
            w = x_data1[x] + x_data3[x]
            
            end_x = int(offset_x + (cos * x_data1[x]) + (sin * x_data2[x]))
            end_y = int(offset_y - (sin * x_data1[x]) + (cos * x_data2[x]))
            start_x = int(end_x - w)
            start_y = int(end_y - h)
            
            rectangles.append((start_x, start_y, end_x, end_y))
            confidences.append(scores_data[x])
    
    return (rectangles, confidences)

# Function to detect text regions using EAST and pass them to OCR
def detect_and_extract_text(image_path, east_model_path, csv_writer):
    img = cv2.imread(image_path)
    if img is None:
        raise FileNotFoundError(f"Image at path {image_path} not found.")
    orig = img.copy()
    (H, W) = img.shape[:2]
    
    newW, newH = (320, 320)
    rW = W / float(newW)
    rH = H / float(newH)
    
    img = cv2.resize(img, (newW, newH))

    net = cv2.dnn.readNet(east_model_path)

    blob = cv2.dnn.blobFromImage(img, 1.0, (newW, newH), 
                                 (123.68, 116.78, 103.94), swapRB=True, crop=False)
    net.setInput(blob)
    
    (scores, geometry) = net.forward(["feature_fusion/Conv_7/Sigmoid", 
                                      "feature_fusion/concat_3"])


    rectangles, confidences = decode_predictions(scores, geometry, 0.5)

    boxes = non_max_suppression(np.array(rectangles), probs=confidences)
    
    boxes = [(int(rH * startX), int(rW * startY), int(rH * endX), int(rW * endY))
             for (startX, startY, endX, endY) in boxes]

    ocr_model = PaddleOCR(use_angle_cls=True, lang='en')

    # Regex pattern for matching entity values
    pattern = re.compile(r'\b\d+(\.\d+)?(g|G|ml|v|kg|oz|mg|Mg|MG|L|cm|W)\b', re.IGNORECASE)

    matched_text = 'null'  # Default value if no match found

    for (startX, startY, endX, endY) in boxes:
        cropped_image = orig[startY:endY, startX:endX]
        
        result = ocr_model.ocr(cropped_image, cls=True)
        
        if result and result[0]:
            extracted_text = [line[1][0] for line in result[0]]
            text = ''.join(extracted_text)
            # Search for matches in the extracted text
            match = pattern.search(text)
            if match:
                matched_text = match.group()
                break  # Stop after finding the first match
    
    # Write the matched text or 'null' to CSV
    csv_writer.writerow([image_path, matched_text])

# Function to process a list of images and save extracted text to CSV
def process_images(image_paths, east_model_path, csv_filename):
    # Open the CSV file in 'a' mode for appending. This ensures data is appended to the file.
    # The 'w' mode will overwrite, but 'a' mode appends to the file
    with open(csv_filename, mode='a', newline='', encoding='utf-8') as csvfile:
        csv_writer = csv.writer(csvfile)
        
        # Only write the header once, before processing images
        if csvfile.tell() == 0:  # Checks if the file is empty (write header if so)
            csv_writer.writerow(['Image Path', 'Extracted Text'])  # CSV header
        
        # Process all images and extract text
        for image_path in image_paths:
            detect_and_extract_text(image_path, east_model_path, csv_writer)

# Example usage:
image_paths = [
    r'C:\Users\archi\Downloads\processed_images\processed_images\81CAc3ok29L.jpg',
    # Add more image paths here
]
east_model_path = r'C:\Users\archi\Downloads\frozen_east_text_detection\frozen_east_text_detection.pb'
csv_filename = r'C:\Users\archi\Desktop\extracted_text.csv'

process_images(image_paths, east_model_path, csv_filename)


[2024/11/29 23:24:40] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='C:\\Users\\archi/.paddleocr/whl\\det\\en\\en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='C:\\Users\\archi/.paddleocr/whl\\rec\\en\\en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=