In [None]:
# Install dependencies
!pip install easyocr opencv-python numpy matplotlib

import cv2
import numpy as np
import easyocr
import matplotlib.pyplot as plt
import os
from google.colab.patches import cv2_imshow

# Initialize EasyOCR reader
reader = easyocr.Reader(['en'])

def extract_text_regions(image_path):
    img = cv2.imread(image_path)
    results = reader.readtext(img, detail=1)
    text_data = [((int(bbox[0][0]), int(bbox[0][1]), int(bbox[2][0]), int(bbox[2][1])), text) for bbox, text, prob in results]
    return text_data

def sort_text_by_columns(text_data):
    text_data.sort(key=lambda box: (box[0][1], box[0][0]))
    return [text for _, text in text_data]

def get_comic_text(image_path):
    text_data = extract_text_regions(image_path)
    return sort_text_by_columns(text_data)

def find_horizontal_gaps(binary_image):
    height, width = binary_image.shape
    horizontal_lines = [0]
    for y in range(1, height - 1):
        if np.all(binary_image[y, :] == 255) and not np.all(binary_image[y - 1, :] == 255):
            horizontal_lines.append(y)
    horizontal_lines.append(height)
    return horizontal_lines

def find_vertical_gaps(binary_image, horizontal_lines):
    height, width = binary_image.shape
    vertical_lines = [0]
    for x in range(1, width - 1):
        for i in range(1, len(horizontal_lines)):
            y1, y2 = horizontal_lines[i - 1], horizontal_lines[i]
            if np.all(binary_image[y1:y2, x] == 255) and not np.all(binary_image[y1:y2, x - 1] == 255):
                vertical_lines.append(x)
    vertical_lines.append(width)
    return vertical_lines

def extract_sub_images(image, horizontal_lines, vertical_lines, base_path):
    sub_images = []
    os.makedirs(base_path, exist_ok=True)
    for i in range(len(horizontal_lines) - 1):
        for j in range(len(vertical_lines) - 1):
            y1, y2 = horizontal_lines[i], horizontal_lines[i + 1]
            x1, x2 = vertical_lines[j], vertical_lines[j + 1]
            sub_img = image[y1:y2, x1:x2]
            if sub_img.shape[0] > 10 and sub_img.shape[1] > 10:
                sub_img_path = os.path.join(base_path, f'sub_img_{i}_{j}.jpg')
                cv2.imwrite(sub_img_path, sub_img)
                sub_images.append(sub_img_path)
    return sub_images

def recursive_panel_detection(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Unable to read image at {image_path}")
        return []
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
    horizontal_lines = find_horizontal_gaps(binary)
    vertical_lines = find_vertical_gaps(binary, horizontal_lines)
    return extract_sub_images(image, horizontal_lines, vertical_lines, 'sub_images')

def process_comic_image(image_path):
    sub_image_paths = recursive_panel_detection(image_path)
    extracted_texts = [get_comic_text(img) for img in sub_image_paths]
    return extracted_texts

# Example usage
image_path = "Comic3.jpg"  # Replace with actual image path
text_output = process_comic_image(image_path)
for idx, texts in enumerate(text_output):
    print(f"Panel {idx+1}:")
    for line in texts:
        print(f"  {line}")


Collecting easyocr
  Downloading easyocr-1.7.2-py3-none-any.whl.metadata (10 kB)
Collecting python-bidi (from easyocr)
  Downloading python_bidi-0.6.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Collecting pyclipper (from easyocr)
  Downloading pyclipper-1.3.0.post6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.0 kB)
Collecting ninja (from easyocr)
  Downloading ninja-1.11.1.4-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->easyocr)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->easyocr)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->easyocr)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (



Progress: |██████████████████████████████████████████████████| 100.0% Complete



Progress: |--------------------------------------------------| 0.0% CompleteProgress: |--------------------------------------------------| 0.1% CompleteProgress: |--------------------------------------------------| 0.1% CompleteProgress: |--------------------------------------------------| 0.2% CompleteProgress: |--------------------------------------------------| 0.2% CompleteProgress: |--------------------------------------------------| 0.3% CompleteProgress: |--------------------------------------------------| 0.4% CompleteProgress: |--------------------------------------------------| 0.4% CompleteProgress: |--------------------------------------------------| 0.5% CompleteProgress: |--------------------------------------------------| 0.5% CompleteProgress: |--------------------------------------------------| 0.6% CompleteProgress: |--------------------------------------------------| 0.6% CompleteProgress: |--------------------------------------------------| 0.7% Complet

In [None]:
# Install dependencies
!pip install easyocr opencv-python numpy matplotlib gTTS

import cv2
import numpy as np
import easyocr
import matplotlib.pyplot as plt
import os
from gtts import gTTS
from google.colab.patches import cv2_imshow

# Initialize EasyOCR reader
reader = easyocr.Reader(['en'])

# === OCR Functions ===
def extract_text_regions(image_path):
    img = cv2.imread(image_path)
    results = reader.readtext(img, detail=1)
    text_data = [((int(bbox[0][0]), int(bbox[0][1]), int(bbox[2][0]), int(bbox[2][1])), text) for bbox, text, prob in results]
    return text_data

def sort_text_by_columns(text_data):
    text_data.sort(key=lambda box: (box[0][1], box[0][0]))
    return [text for _, text in text_data]

def get_comic_text(image_path):
    text_data = extract_text_regions(image_path)
    return sort_text_by_columns(text_data)

# === Panel Detection ===
def find_horizontal_gaps(binary_image):
    height, width = binary_image.shape
    horizontal_lines = [0]
    for y in range(1, height - 1):
        if np.all(binary_image[y, :] == 255) and not np.all(binary_image[y - 1, :] == 255):
            horizontal_lines.append(y)
    horizontal_lines.append(height)
    return horizontal_lines

def find_vertical_gaps(binary_image, horizontal_lines):
    height, width = binary_image.shape
    vertical_lines = [0]
    for x in range(1, width - 1):
        for i in range(1, len(horizontal_lines)):
            y1, y2 = horizontal_lines[i - 1], horizontal_lines[i]
            if np.all(binary_image[y1:y2, x] == 255) and not np.all(binary_image[y1:y2, x - 1] == 255):
                vertical_lines.append(x)
    vertical_lines.append(width)
    return vertical_lines

def extract_sub_images(image, horizontal_lines, vertical_lines, base_path):
    sub_images = []
    os.makedirs(base_path, exist_ok=True)
    for i in range(len(horizontal_lines) - 1):
        for j in range(len(vertical_lines) - 1):
            y1, y2 = horizontal_lines[i], horizontal_lines[i + 1]
            x1, x2 = vertical_lines[j], vertical_lines[j + 1]
            sub_img = image[y1:y2, x1:x2]
            if sub_img.shape[0] > 10 and sub_img.shape[1] > 10:
                sub_img_path = os.path.join(base_path, f'sub_img_{i}_{j}.jpg')
                cv2.imwrite(sub_img_path, sub_img)
                sub_images.append(sub_img_path)
    return sub_images

def recursive_panel_detection(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Unable to read image at {image_path}")
        return []
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
    horizontal_lines = find_horizontal_gaps(binary)
    vertical_lines = find_vertical_gaps(binary, horizontal_lines)
    return extract_sub_images(image, horizontal_lines, vertical_lines, 'sub_images')

# === Voice Conversion ===
def convert_panels_to_speech(panel_texts, output_dir='panel_audio'):
    os.makedirs(output_dir, exist_ok=True)
    for i, panel_lines in enumerate(panel_texts):
        panel_text = ' '.join(panel_lines).strip()
        if panel_text:
            tts = gTTS(text=panel_text, lang='en')
            filename = os.path.join(output_dir, f'panel_{i+1}.mp3')
            tts.save(filename)
            print(f"🔊 Saved: {filename}")
        else:
            print(f"⚠️ Panel {i+1} has no text. Skipping.")

# === Main Process ===
def process_comic_image(image_path):
    sub_image_paths = recursive_panel_detection(image_path)
    extracted_texts = [get_comic_text(img) for img in sub_image_paths]

    # Output to console
    for idx, texts in enumerate(extracted_texts):
        print(f"\n🖼️ Panel {idx+1}:")
        for line in texts:
            print(f"  {line}")

    # Convert to audio
    convert_panels_to_speech(extracted_texts)

# === Usage ===
image_path = "Comic3.jpg"  # Replace with your comic image
process_comic_image(image_path)


Collecting easyocr
  Downloading easyocr-1.7.2-py3-none-any.whl.metadata (10 kB)
Collecting gTTS
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Collecting python-bidi (from easyocr)
  Downloading python_bidi-0.6.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Collecting pyclipper (from easyocr)
  Downloading pyclipper-1.3.0.post6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.0 kB)
Collecting ninja (from easyocr)
  Downloading ninja-1.11.1.4-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->easyocr)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->easyocr)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->easyocr)
  Downloading 



Progress: |██████████████████████████████████████████████████| 100.0% Complete



Progress: |--------------------------------------------------| 0.0% CompleteProgress: |--------------------------------------------------| 0.1% CompleteProgress: |--------------------------------------------------| 0.1% CompleteProgress: |--------------------------------------------------| 0.2% CompleteProgress: |--------------------------------------------------| 0.2% CompleteProgress: |--------------------------------------------------| 0.3% CompleteProgress: |--------------------------------------------------| 0.4% CompleteProgress: |--------------------------------------------------| 0.4% CompleteProgress: |--------------------------------------------------| 0.5% CompleteProgress: |--------------------------------------------------| 0.5% CompleteProgress: |--------------------------------------------------| 0.6% CompleteProgress: |--------------------------------------------------| 0.6% CompleteProgress: |--------------------------------------------------| 0.7% Complet

In [None]:
# Install pydub if not already
!pip install pydub

from pydub import AudioSegment
import os

def combine_panel_audios(input_dir='panel_audio', combined_filename='combined_comic_audio.mp3'):
    combined_audio = AudioSegment.empty()
    panel_files = sorted(
        [f for f in os.listdir(input_dir) if f.startswith('panel_') and f.endswith('.mp3')],
        key=lambda name: int(name.split('_')[1].split('.')[0])
    )

    for file in panel_files:
        file_path = os.path.join(input_dir, file)
        panel_audio = AudioSegment.from_mp3(file_path)
        combined_audio += panel_audio + AudioSegment.silent(duration=500)  # 0.5s pause between panels
        print(f"Added: {file}")

    combined_path = os.path.join(input_dir, combined_filename)
    combined_audio.export(combined_path, format='mp3')
    print(f"\n✅ Combined audio saved at: {combined_path}")

combine_panel_audios()

Added: panel_1.mp3
Added: panel_2.mp3
Added: panel_3.mp3
Added: panel_4.mp3
Added: panel_5.mp3
Added: panel_6.mp3
Added: panel_7.mp3
Added: panel_8.mp3

✅ Combined audio saved at: panel_audio/combined_comic_audio.mp3
