In [1]:
# First step installation
!pip install easyocr
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu


Collecting easyocr
  Downloading easyocr-1.7.2-py3-none-any.whl.metadata (10 kB)
Collecting python-bidi (from easyocr)
  Downloading python_bidi-0.6.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Collecting pyclipper (from easyocr)
  Downloading pyclipper-1.3.0.post6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.0 kB)
Collecting ninja (from easyocr)
  Downloading ninja-1.11.1.4-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->easyocr)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->easyocr)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->easyocr)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (

In [2]:
#CHECK VERSION
import easyocr
import torch
import torchvision
import torchaudio

print("\n📦 Installed Versions:")
print(f"EasyOCR version     : {easyocr.__version__}")
print(f"Torch version       : {torch.__version__}")
print(f"Torchvision version : {torchvision.__version__}")
print(f"Torchaudio version  : {torchaudio.__version__}")

import sys
print("Python version:", sys.version)




📦 Installed Versions:
EasyOCR version     : 1.7.2
Torch version       : 2.6.0+cu124
Torchvision version : 0.21.0+cu124
Torchaudio version  : 2.6.0+cu124
Python version: 3.11.13 (main, Jun  4 2025, 08:57:29) [GCC 11.4.0]


In [14]:
!pip install gTTS

Collecting gTTS
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Collecting click<8.2,>=7.1 (from gTTS)
  Downloading click-8.1.8-py3-none-any.whl.metadata (2.3 kB)
Downloading gTTS-2.5.4-py3-none-any.whl (29 kB)
Downloading click-8.1.8-py3-none-any.whl (98 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/98.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.2/98.2 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: click, gTTS
  Attempting uninstall: click
    Found existing installation: click 8.2.1
    Uninstalling click-8.2.1:
      Successfully uninstalled click-8.2.1
Successfully installed click-8.1.8 gTTS-2.5.4


In [17]:
import easyocr
import re
import os
from gtts import gTTS
from IPython.display import Audio

def run_ocr_cleaner(image_path, audio_output="output_audio.mp3", play_audio=False):
    """
    Full OCR pipeline:
    1. OCR using EasyOCR
    2. Regex-based text cleanup
    3. Stats computation
    4. TTS using gTTS (saves audio)

    Parameters:
    - image_path (str): Path to image file
    - audio_output (str): Output audio file path (MP3)
    - play_audio (bool): Whether to play audio inside notebook (Colab compatible)
    """

    if not os.path.exists(image_path):
        print(f"❌ File not found: {image_path}")
        return

    print("🔍 Running OCR...")
    reader = easyocr.Reader(['en'])
    results = reader.readtext(image_path, detail=0)
    raw_text = ' '.join(results)

    # Tokenization
    all_words = re.findall(r'\b\w+\b', raw_text)

    # Define filtering rules
    meaningless_patterns = [
        r'^[a-z]{1,2}$',
        r'^[^aeiou]{2,}$',
        r'[~`!@#$%^&*()_\-+=\[\]{}|:;"<>,.?/\\]',
        r'[a-z]*[^aeiou\s]{3,}[a-z]*',
        r'\d+',
    ]
    combined_pattern = re.compile("|".join(meaningless_patterns), re.IGNORECASE)

    # Filtered output
    cleaned_words = [word for word in all_words if not combined_pattern.fullmatch(word)]
    cleaned_text = ' '.join(cleaned_words)

    # Stats
    total_words = len(all_words)
    retained_words = len(cleaned_words)
    accuracy = round((retained_words / total_words) * 100, 2) if total_words else 0

    # Output results
    print("\n✅ --- Cleaned Extracted Text ---\n")
    print(cleaned_text if cleaned_text else "(No meaningful text detected)")

    print("\n📊 --- Stats ---")
    print(f"Total Words Extracted (Raw OCR) : {total_words}")
    print(f"Meaningful Words Retained       : {retained_words}")
    print(f"Approximate Cleaned Accuracy    : {accuracy}%")

    # Convert to audio using gTTS (for Linux/Colab compatibility)
    if cleaned_text:
        print("\n🔊 Converting cleaned text to audio (gTTS)...")
        tts = gTTS(cleaned_text)
        tts.save(audio_output)
        print(f"🎧 Audio saved to: {audio_output}")

        if play_audio:
            display(Audio(audio_output))
    else:
        print("⚠️ No meaningful text to convert to audio.")

# Example usage:
# run_ocr_cleaner("book_page.jpg", "output_audio.mp3", play_audio=True)


In [18]:
run_ocr_cleaner("book_page1.jpg", "book_page_audio.mp3" , play_audio=True)



🔍 Running OCR...

✅ --- Cleaned Extracted Text ---

who heard the girl sing was man this Tve been there See here hide You she had talent what continued and know embarrassed but the man The was buck teeth Don crime having when they see there any the audience will love you What Open your and those teeth hide_ Besides said may make fortunel about her teeth From that time tohide his advice and forgot wide and Cass Daley about her audience She her movies she that she became top star sang with such hag lecopG her radio Other comedians are now men who had never The William James was speaking develops ten Themeseovesewhen declared avecago maa found abilities Compared what per ofhis latent mental making use small part wrote are halfawake are the human ofour and resources Stating the various sius dives far within his limits possesses powers which fails use You and have such abilities let not waste second like other You are something new this because are not been Never before since the beginning 