In [None]:
!pip install easyocr jiwer nltk

In [None]:
import cv2
import easyocr
import pyttsx3
import threading
import queue
import time
from jiwer import wer, cer
from nltk.translate.bleu_score import sentence_bleu

In [None]:
# Init Text-to-Speech (Windows sapi5)

engine = pyttsx3.init('sapi5')
engine.setProperty('rate', 150)
engine.setProperty('volume', 1)

# Queue to hold texts to speak
speech_queue = queue.Queue()

# Store metrics
ocr_metrics_list = []
tts_metrics_list = []
detected_text_list = []

def tts_worker():
    while True:
        text = speech_queue.get()
        if text is None:  # Signal to stop the thread
            break

        # --- Measure TTS Response Time ---
        start_time = time.time()
        engine.say(text)
        engine.runAndWait()
        response_time = time.time() - start_time

        tts_metrics_list.append({
            "text": text,
            "response_time": response_time
        })

        print(f"TTS Response Time for '{text}': {response_time:.2f}s")
        speech_queue.task_done()

# Start the TTS thread
thread = threading.Thread(target=tts_worker, daemon=True)
thread.start()

TTS Response Time for 'fiction section to right': 6.32s
TTS Response Time for '8': 1.51s
TTS Response Time for '0': 0.50s
TTS Response Time for '81': 0.42s
TTS Response Time for '87': 0.07s


In [None]:

cap = cv2.VideoCapture(0)
reader = easyocr.Reader(['en'])

while True:
    ret, frame = cap.read()
    if not ret:
        break

    results = reader.readtext(frame)

    for (bbox, text, prob) in results:
        top_left = tuple(map(int, bbox[0]))
        bottom_right = tuple(map(int, bbox[2]))

        cv2.rectangle(frame, top_left, bottom_right, (0, 255, 0), 2)
        cv2.putText(frame, text, (top_left[0], top_left[1] - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

        cleaned_text = text.strip()
        if cleaned_text and cleaned_text not in detected_text_list:
            detected_text_list.append(cleaned_text)
            print(f"Queueing for speech: {cleaned_text}")
            speech_queue.put(cleaned_text)  # Add text to TTS queue

            # --- Evaluate OCR (if you have ground truth available) ---
            # For demo: assume ground truth = cleaned_text (self-comparison)
            ground_truth = cleaned_text
            ocr_output = cleaned_text   # Replace with OCR output if comparing
            cer_score = cer(ground_truth, ocr_output)
            wer_score = wer(ground_truth, ocr_output)
            bleu_score = sentence_bleu([ground_truth.split()], ocr_output.split())

            ocr_metrics_list.append({
                "text": cleaned_text,
                "CER": cer_score,
                "WER": wer_score,
                "BLEU": bleu_score
            })

            print(f"OCR Metrics for '{cleaned_text}': CER={cer_score:.2f}, WER={wer_score:.2f}, BLEU={bleu_score:.2f}")

    cv2.imshow("Live OCR with Speech", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# ===============================
# Cleanup
# ===============================
cap.release()
cv2.destroyAllWindows()
speech_queue.put(None)
thread.join()

print("\n=== Final Results ===")
print("Detected Texts:")
for i, t in enumerate(detected_text_list, 1):
    print(f"{i}. {t}")

print("\nOCR Metrics Collected:")
for m in ocr_metrics_list:
    print(m)

print("\nTTS Metrics Collected:")
for m in tts_metrics_list:
    print(m)


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Queueing for speech: fiction section to right
OCR Metrics for 'fiction section to right': CER=0.00, WER=0.00, BLEU=1.00
Queueing for speech: 8
OCR Metrics for '8': CER=0.00, WER=0.00, BLEU=0.00


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


Queueing for speech: 0
OCR Metrics for '0': CER=0.00, WER=0.00, BLEU=0.00
Queueing for speech: 81
OCR Metrics for '81': CER=0.00, WER=0.00, BLEU=0.00
Queueing for speech: 87
OCR Metrics for '87': CER=0.00, WER=0.00, BLEU=0.00
