In [1]:
!pip install opencv-python-headless
!pip install pytesseract torchvision torch gradio requests

Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Coll

In [2]:
import os
import cv2
import pytesseract
import torch
import numpy as np
import requests
from PIL import Image
from torchvision.models import resnet18
import torchvision.transforms as transforms

import os
os.environ["OPENROUTER_API_KEY"] = "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"

OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")


if not OPENROUTER_API_KEY:
    print("❌ No API key found")
elif not OPENROUTER_API_KEY.startswith("sk-"):
    print("⚠️ Check API key format")
else:
    print("✅ API key found and seems valid")

resnet = resnet18(pretrained=True)
resnet.eval()

✅ API key found and seems valid


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 151MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [3]:
def preprocess_with_resnet(image):
    """Use ResNet to enhance image quality before OCR"""
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])

    pil_image = Image.fromarray(cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB))
    input_tensor = transform(pil_image).unsqueeze(0)

    return np.array(pil_image)


In [4]:
def extract_text(image):
    """OCR after ResNet preprocessing"""
    enhanced = preprocess_with_resnet(image)

    gray = cv2.cvtColor(enhanced, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    denoised = cv2.fastNlMeansDenoising(thresh, h=10)

    custom_config = r'--oem 3 --psm 6'
    text = pytesseract.image_to_string(denoised, config=custom_config)

    return text.strip()

In [5]:
def translate_with_openrouter(text, target_language="English"):
    if not OPENROUTER_API_KEY:
        return "🔑 API key missing"

    headers = {
        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": "mistralai/mistral-7b-instruct:free",
        "messages": [
            {
                "role": "user",
                "content": f"Translate the following text to {target_language}:\n\n{text}"
            }
        ],
        "temperature": 0.3
    }

    r = requests.post("https://openrouter.ai/api/v1/chat/completions",
                      headers=headers, json=payload, timeout=60)

    if r.status_code != 200:
        return f"Translation error {r.status_code}: {r.text}"

    return r.json()["choices"][0]["message"]["content"]

def process_image(image, target_language="English"):
    if not image:
        return "No image provided", ""

    extracted = extract_text(image)
    if not extracted:
        return "No text found in image", ""

    translated = translate_with_openrouter(extracted, target_language)
    return extracted, translated

In [6]:
import gradio as gr

with gr.Blocks(title="Image to Text Translator") as demo:
    gr.Markdown("## OCR + Translation using ResNet via OpenRouter")

    with gr.Row():
        with gr.Column():
            image_input = gr.Image(label="Upload Image", type="pil")
            language_dropdown = gr.Dropdown(
                choices=["English", "Spanish", "French", "German", "Chinese", "Japanese", "Korean", "Hindi"],
                value="English",
                label="Target Language"
            )
            translate_btn = gr.Button("Translate")
        with gr.Column():
            extracted_text = gr.Textbox(label="Extracted Text", lines=5)
            translated_text = gr.Textbox(label="Translated Text", lines=5)

    translate_btn.click(
        fn=process_image,
        inputs=[image_input, language_dropdown],
        outputs=[extracted_text, translated_text]
    )

demo.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://95b081fca46f914008.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


