In [None]:
!pip install opencv-python-headless
!pip install pytesseract torchvision torch gradio requests

In [None]:
!pip install openai python-dotenv

In [22]:
import os
import cv2
import pytesseract
import torch
import numpy as np
import requests
from PIL import Image
import torchvision.transforms as transforms
from dotenv import load_dotenv
from openai import OpenAI

os.environ["OPENROUTER_API_KEY"] = "YOUR_KEY"

OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")

client = OpenAI(
    api_key=OPENROUTER_API_KEY,
    base_url="https://openrouter.ai/api/v1"
)

if not OPENROUTER_API_KEY:
    print("❌ No API key found")
elif not OPENROUTER_API_KEY.startswith("sk-"):
    print("⚠️ Check API key format")
else:
    print("✅ API key found and seems valid")

✅ API key found and seems valid


In [None]:
def transform_image(image):
    """Resize and convert image format for preprocessing"""
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])

    pil_image = Image.fromarray(cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB))
    input_tensor = transform(pil_image).unsqueeze(0)

    return np.array(pil_image)


In [None]:
def extract_text(image):
    """OCR after ResNet preprocessing"""
    enhanced = transform_image(image)

    gray = cv2.cvtColor(enhanced, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    denoised = cv2.fastNlMeansDenoising(thresh, h=10)

    custom_config = r'--oem 3 --psm 6'
    text = pytesseract.image_to_string(denoised, config=custom_config)

    return text.strip()

In [None]:
def translate_with_openrouter_chatgptmodel(text, target_language="English"):
    try:
        response = client.chat.completions.create(
            model="openai/gpt-4o",
            messages=[
                {
                    "role": "user",
                    "content": f"Translate the following text to {target_language}:\n\n{text}"
                }
            ],
            temperature=0.3,
            max_tokens=500
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"❌ Translation error: {e}"


In [None]:
def process_image(image, target_language="English"):
    if not image:
        return "No image provided", ""

    extracted = extract_text(image)
    if not extracted:
        return "No text found in image", ""

    translated = translate_with_openrouter_chatgptmodel(extracted, target_language)
    return extracted, translated

In [None]:
import gradio as gr

with gr.Blocks(title="Image to Text Translator") as demo:
    gr.Markdown("## OCR + Translation using OpenCV (via OpenRouter)")

    with gr.Row():
        with gr.Column():
            image_input = gr.Image(label="Upload Image", type="pil")
            language_dropdown = gr.Dropdown(
                choices=["English", "Spanish", "French", "German", "Chinese", "Japanese", "Korean", "Hindi"],
                value="English",
                label="Target Language"
            )
            translate_btn = gr.Button("Translate")
        with gr.Column():
            extracted_text = gr.Textbox(label="Extracted Text", lines=5)
            translated_text = gr.Textbox(label="Translated Text", lines=5)

    translate_btn.click(
        fn=process_image,
        inputs=[image_input, language_dropdown],
        outputs=[extracted_text, translated_text]
    )

demo.launch()