In [1]:
import cv2
import pytesseract
import numpy as np

# Load the EAST model
net = cv2.dnn.readNet(r"C:\Users\Naga Hari Priyan\Desktop\CV_K\CPU_EAST\frozen_east_text_detection.pb")

# Set Tesseract executable path
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract.exe'

# Define a function to decode predictions from the EAST model
def decode_predictions(scores, geometry):
    (numRows, numCols) = scores.shape[2:4]
    rects = []
    confidences = []

    for y in range(0, numRows):
        scoresData = scores[0, 0, y]
        xData0 = geometry[0, 0, y]
        xData1 = geometry[0, 1, y]
        xData2 = geometry[0, 2, y]
        xData3 = geometry[0, 3, y]
        anglesData = geometry[0, 4, y]

        for x in range(0, numCols):
            if scoresData[x] < 0.5:
                continue

            (offsetX, offsetY) = (x * 4.0, y * 4.0)
            angle = anglesData[x]
            cos = np.cos(angle)
            sin = np.sin(angle)

            h = xData0[x] + xData2[x]
            w = xData1[x] + xData3[x]

            endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
            endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
            startX = int(endX - w)
            startY = int(endY - h)

            rects.append((startX, startY, endX, endY))
            confidences.append(scoresData[x])

    return (rects, confidences)

def process_frame(frame, net):
    orig = frame.copy()
    (H, W) = frame.shape[:2]

    newW, newH = (320, 320)
    rW = W / float(newW)
    rH = H / float(newH)
    frame = cv2.resize(frame, (newW, newH))
    (H, W) = frame.shape[:2]

    # Prepare the frame for the EAST model
    blob = cv2.dnn.blobFromImage(frame, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False)
    net.setInput(blob)
    (scores, geometry) = net.forward(["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"])

    # Decode the predictions
    (rects, confidences) = decode_predictions(scores, geometry)
    boxes = cv2.dnn.NMSBoxes(rects, confidences, 0.5, 0.4)

    # Check the format and content of boxes
    print("Boxes:", boxes)
    
    if len(boxes) == 0:
        print("No bounding boxes detected.")
    else:
        # Convert boxes to flat array if necessary
        boxes = boxes.flatten() if isinstance(boxes, np.ndarray) else boxes
        results = []
        for i in boxes:
            index = i[0] if isinstance(i, (list, tuple)) else i
            if index < len(rects):
                (startX, startY, endX, endY) = rects[index]
                
                startX = int(startX * rW)
                startY = int(startY * rH)
                endX = int(endX * rW)
                endY = int(endY * rH)

                roi = orig[startY:endY, startX:endX]
                config = ("-l eng --oem 1 --psm 7")
                text = pytesseract.image_to_string(roi, config=config)

                results.append(((startX, startY, endX, endY), text))

        # Display the results
        for ((startX, startY, endX, endY), text) in results:
            print(f"OCR Text: {text}")
            cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2)
            cv2.putText(orig, text, (startX, startY - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    return orig


# Open the video file
video = cv2.VideoCapture(r"v1.mp4")

# Check if video opened successfully
if not video.isOpened():
    print("Error: Could not open video.")
    exit()

# Loop over the frames from the video
while True:
    ret, frame = video.read()
    if not ret:
        print("Error: Could not read frame.")
        break
    
    # Process each frame to detect and extract text
    processed_frame = process_frame(frame, net)

    # Display the processed frame
    cv2.imshow("Text Detection", processed_frame)

    # Wait for key press and exit on 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture object and close all OpenCV windows
video.release()
cv2.destroyAllWindows()


Boxes: [47 68 42]
OCR Text: UHAVEI


OCR Text: UNLY


OCR Text: YUUH


Boxes: [47 68 78]
OCR Text: UHAVEI


OCR Text: UNLY


OCR Text: YOU H1


Boxes: [46 67 77]
OCR Text: UHAVEI


OCR Text: UNLY


OCR Text: YOU H1


Boxes: [46 67 41]
OCR Text: JHAVEI


OCR Text: UNLY


OCR Text: YEIUH


Boxes: [44 65 38]
OCR Text: IHAVEI


OCR Text: UNLY


OCR Text: YOU}


Boxes: [44 65 38]
OCR Text: IHAVEI


OCR Text: UNLY


OCR Text: YOU}


Boxes: [48 65 38]
OCR Text: {HAVE


OCR Text: UNLY


OCR Text: YOU}


Boxes: [48 64 38]
OCR Text: {HAVE


OCR Text: UNLY


OCR Text: YOU}


Boxes: [48 64 38]
OCR Text: {HAVE


OCR Text: UNLY


OCR Text: YOU}


Boxes: [48 64 38]
OCR Text: {HAVE


OCR Text: UNLY


OCR Text: YUUI


Boxes: [48 38 64]
OCR Text: {HAVE


OCR Text: YEIUI


OCR Text: UNLY


Boxes: [48 38 64]
OCR Text: ‘HAVE


OCR Text: YEIUI


OCR Text: UNLY


Boxes: [48 38 64]
OCR Text: {HAVE


OCR Text: YEIUI


OCR Text: UNLY


Boxes: [48 38 64]
OCR Text: ‘HAVE


OCR Text: YOU I


OCR Text: UNLY


Boxes

In [2]:
import cv2
import pytesseract
import numpy as np
import threading
import queue

# Load the EAST model
net = cv2.dnn.readNet(r"C:\Users\Naga Hari Priyan\Desktop\CV_K\CPU_EAST\frozen_east_text_detection.pb")

# Set Tesseract executable path
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract.exe'

# Define a function to decode predictions from the EAST model
def decode_predictions(scores, geometry):
    (numRows, numCols) = scores.shape[2:4]
    rects = []
    confidences = []

    for y in range(numRows):
        scoresData = scores[0, 0, y]
        xData0 = geometry[0, 0, y]
        xData1 = geometry[0, 1, y]
        xData2 = geometry[0, 2, y]
        xData3 = geometry[0, 3, y]
        anglesData = geometry[0, 4, y]

        for x in range(numCols):
            if scoresData[x] < 0.5:
                continue

            (offsetX, offsetY) = (x * 4.0, y * 4.0)
            angle = anglesData[x]
            cos = np.cos(angle)
            sin = np.sin(angle)

            h = xData0[x] + xData2[x]
            w = xData1[x] + xData3[x]

            endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
            endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
            startX = int(endX - w)
            startY = int(endY - h)

            rects.append((startX, startY, endX, endY))
            confidences.append(float(scoresData[x]))  # Ensure confidences are floats

    return (rects, confidences)

def process_frame(frame, net):
    orig = frame.copy()
    (H, W) = frame.shape[:2]

    newW, newH = (320, 320)
    rW = W / float(newW)
    rH = H / float(newH)
    frame = cv2.resize(frame, (newW, newH))
    (H, W) = frame.shape[:2]

    # Prepare the frame for the EAST model
    blob = cv2.dnn.blobFromImage(frame, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False)
    net.setInput(blob)
    (scores, geometry) = net.forward(["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"])

    # Print shapes
    print(f"Blob shape: {blob.shape}")
    print(f"Scores shape: {scores.shape}")
    print(f"Geometry shape: {geometry.shape}")

    # Decode the predictions
    (rects, confidences) = decode_predictions(scores, geometry)
    
    if not rects:
        print("No text detected")
        return orig  # No text detected

    # Debug prints
    print(f"Detected rectangles: {rects}")
    print(f"Detected confidences: {confidences}")

    boxes = cv2.dnn.NMSBoxes(rects, confidences, 0.5, 0.4)

    # Debug prints
    print(f"Boxes after NMS: {boxes}")

    if boxes is None or len(boxes) == 0:
        print("No valid boxes found")
        return orig  # No valid boxes found

    # Perform OCR on the detected text regions
    results = []
    for i in range(len(boxes)):
        box = boxes[i]
        if isinstance(box, np.ndarray) and box.ndim == 2:
            box = box[0]
        elif isinstance(box, np.ndarray) and box.ndim == 1:
            box = np.expand_dims(box, axis=0)
        else:
            continue  # Skip invalid boxes

        (startX, startY, endX, endY) = rects[box[0]]

        startX = int(startX * rW)
        startY = int(startY * rH)
        endX = int(endX * rW)
        endY = int(endY * rH)

        roi = orig[startY:endY, startX:endX]
        print(f"ROI shape: {roi.shape}")
        config = ("-l eng --oem 1 --psm 7")
        text = pytesseract.image_to_string(roi, config=config)
        print(f"Detected Text: {text}")

        results.append(((startX, startY, endX, endY), text))

    # Display the results
    for ((startX, startY, endX, endY), text) in results:
        cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2)
        cv2.putText(orig, text, (startX, startY - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    return orig



# Function to read frames from the video
def frame_reader(video, frame_queue):
    while True:
        ret, frame = video.read()
        if not ret:
            frame_queue.put(None)  # Sentinel value to signal end of video
            break
        frame_queue.put(frame)

# Function to process frames from the queue
def frame_processor(frame_queue, net):
    while True:
        frame = frame_queue.get()
        if frame is None:
            break
        try:
            processed_frame = process_frame(frame, net)
            cv2.imshow("Text Detection", processed_frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        except Exception as e:
            print(f"Error processing frame: {e}")

# Initialize video capture and queue
video = cv2.VideoCapture(r"v1.mp4")
frame_queue = queue.Queue(maxsize=10)

if not video.isOpened():
    print("Error: Could not open video.")
    exit()

# Start threads
reader_thread = threading.Thread(target=frame_reader, args=(video, frame_queue))
processor_thread = threading.Thread(target=frame_processor, args=(frame_queue, net))

reader_thread.start()
processor_thread.start()

reader_thread.join()
processor_thread.join()

video.release()
cv2.destroyAllWindows()


Blob shape: (1, 3, 320, 320)
Scores shape: (1, 1, 80, 80)
Geometry shape: (1, 5, 80, 80)
Detected rectangles: [(60, 147, 149, 173), (62, 146, 145, 171), (62, 148, 143, 172), (63, 147, 141, 171), (63, 147, 140, 171), (64, 147, 140, 170), (65, 147, 140, 170), (64, 148, 139, 171), (62, 147, 141, 170), (97, 146, 193, 169), (102, 146, 194, 168), (102, 146, 195, 169), (103, 146, 195, 169), (104, 146, 193, 169), (106, 146, 193, 169), (104, 147, 192, 170), (103, 147, 192, 170), (103, 147, 192, 171), (102, 147, 192, 171), (101, 147, 191, 172), (97, 147, 189, 172), (96, 147, 188, 172), (98, 147, 187, 173), (186, 147, 259, 170), (186, 146, 259, 169), (187, 146, 257, 169), (188, 147, 255, 170), (188, 148, 254, 170), (189, 147, 254, 170), (189, 147, 254, 171), (189, 146, 254, 171), (187, 147, 254, 172), (187, 147, 253, 173), (188, 147, 253, 174), (61, 147, 150, 173), (61, 146, 147, 172), (61, 147, 148, 171), (62, 147, 145, 171), (63, 147, 143, 171), (63, 147, 142, 170), (63, 147, 141, 170), (62, 14