In [None]:
!pip install pytesseract

In [None]:
import os
import cv2
import pytesseract
import torch
import numpy as np
import requests
from PIL import Image
from torchvision.models import resnet18
import torchvision.transforms as transforms

import os
os.environ["OPENROUTER_API_KEY"] = "YOU_KEY"

OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")


if not OPENROUTER_API_KEY:
    print("❌ No API key found")
elif not OPENROUTER_API_KEY.startswith("sk-"):
    print("⚠️ Check API key format")
else:
    print("✅ API key found and seems valid")

resnet = resnet18(pretrained=True)
resnet.eval()

In [None]:
def preprocess_with_resnet(image):
    """Use ResNet to enhance image quality before OCR"""
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])

    pil_image = Image.fromarray(cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB))
    input_tensor = transform(pil_image).unsqueeze(0)

    with torch.no_grad():
        _ = resnet(input_tensor)

    return np.array(pil_image)


In [None]:
def extract_text(image):
    """OCR after ResNet preprocessing"""
    enhanced = preprocess_with_resnet(image)

    gray = cv2.cvtColor(enhanced, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    denoised = cv2.fastNlMeansDenoising(thresh, h=10)

    custom_config = r'--oem 3 --psm 6'
    text = pytesseract.image_to_string(denoised, config=custom_config)

    return text.strip()

In [None]:
def translate_with_openrouter(text, target_language="English"):
    if not OPENROUTER_API_KEY:
        return "🔑 API key missing"

    headers = {
        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": "mistralai/mistral-7b-instruct:free",
        "messages": [
            {
                "role": "user",
                "content": f"Translate the following text to {target_language}:\n\n{text}"
            }
        ],
        "temperature": 0.3
    }

    r = requests.post("https://openrouter.ai/api/v1/chat/completions",
                      headers=headers, json=payload, timeout=60)

    if r.status_code != 200:
        return f"Translation error {r.status_code}: {r.text}"

    return r.json()["choices"][0]["message"]["content"]

def process_image(image, target_language="English"):
    if not image:
        return "No image provided", ""

    extracted = extract_text(image)
    if not extracted:
        return "No text found in image", ""

    translated = translate_with_openrouter(extracted, target_language)
    return extracted, translated

In [None]:
import gradio as gr

with gr.Blocks(title="Image to Text Translator") as demo:
    gr.Markdown("## OCR + Translation using ResNet via OpenRouter")

    with gr.Row():
        with gr.Column():
            image_input = gr.Image(label="Upload Image", type="pil")
            language_dropdown = gr.Dropdown(
                choices=["English", "Spanish", "French", "German", "Chinese", "Japanese", "Korean", "Hindi"],
                value="English",
                label="Target Language"
            )
            translate_btn = gr.Button("Translate")
        with gr.Column():
            extracted_text = gr.Textbox(label="Extracted Text", lines=5)
            translated_text = gr.Textbox(label="Translated Text", lines=5)

    translate_btn.click(
        fn=process_image,
        inputs=[image_input, language_dropdown],
        outputs=[extracted_text, translated_text]
    )


if __name__ == "__main__":
    demo.launch()
