<a href="https://colab.research.google.com/github/Dhruv-5903/AWP.gg-Executor-Roblox/blob/master/live_chat_pdf.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pytesseract
!pip install langdetect





In [2]:
import pytesseract
from PIL import Image
import ipywidgets as widgets
from IPython.display import display, clear_output
from langdetect import detect
from transformers import MarianMTModel, MarianTokenizer
import os


In [3]:
# Folder for storing uploaded images
os.makedirs("chat_images", exist_ok=True)

# Setup for OCR (Tesseract) and translation (MarianMT)
def extract_text_from_image(image_path):
    img = Image.open(image_path)
    text = pytesseract.image_to_string(img, lang='hin+guj+eng')  # Recognizes Hindi, Gujarati, English
    return text.strip()

def detect_language(text):
    return detect(text)

def translate_text(text, src_lang='hi', tgt_lang='en'):
    model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}"
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)
    translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True))
    return tokenizer.decode(translated[0], skip_special_tokens=True)


In [4]:
# Buttons to send images and text
send_A_image = widgets.Button(description="Send Image A")
send_B_image = widgets.Button(description="Send Image B")
send_A_text = widgets.Button(description="Send Text A")
send_B_text = widgets.Button(description="Send Text B")

# Function to handle sending of images and text messages
def handle_upload(user, uploader, text_message=None):
    if uploader.value:
        uploaded_file = next(iter(uploader.value.values()))
        filename = f"{user}_{uploaded_file['metadata']['name']}"
        filepath = os.path.join("chat_images", filename)
        with open(filepath, 'wb') as f:
            f.write(uploaded_file['content'])

        # Extract text and translate for the receiver (translation shown only to the receiver)
        extracted_text = extract_text_from_image(filepath)
        detected_lang = detect_language(extracted_text)
        if detected_lang != 'en':
            translated_text = translate_text(extracted_text, src_lang=detected_lang)
        else:
            translated_text = extracted_text

        # If the sender is User A, they will only see the image (no translation)
        if user == "A":
            chat_log.append({"sender": user, "image_path": filepath, "translated_text": None, "text_message": text_message})
        else:  # If the sender is User B, the receiver will see the image and the translation
            chat_log.append({"sender": user, "image_path": filepath, "translated_text": translated_text, "text_message": text_message})

        uploader.value.clear()
        update_chat()

# Handle text message submissions
def handle_text_message(user, text_area):
    text_message = text_area.value.strip()
    if text_message:
        chat_log.append({"sender": user, "image_path": None, "translated_text": None, "text_message": text_message})
        text_area.value = ""  # Clear text area after sending
        update_chat()


In [5]:
# Attach event handlers to buttons
send_A_image.on_click(lambda x: handle_upload("A", upload_A, text_A.value))
send_B_image.on_click(lambda x: handle_upload("B", upload_B, text_B.value))
send_A_text.on_click(lambda x: handle_text_message("A", text_A))
send_B_text.on_click(lambda x: handle_text_message("B", text_B))


In [6]:
# Shared chat log to store messages
chat_log = []

# Update the chat UI for both users
def update_chat():
    with output:
        clear_output()
        for msg in chat_log:
            if msg["image_path"]:
                display(Image.open(msg["image_path"]).resize((200, 200)))
                print(f"{msg['sender']} sent an image.")
                if msg.get('translated_text'):
                    print(f"🌐 Translated Text (Visible only to the receiver): {msg['translated_text']}")
            if msg["text_message"]:
                print(f"{msg['sender']} says: {msg['text_message']}")


In [7]:
from IPython.display import display, HTML

# Helper function to format messages in chat bubbles
def format_message(sender, text_message, image_path, translated_text):
    if sender == "A":
        side = "left"  # User A's messages on the left
        bubble_color = "#DCF8C6"  # Light greenish for User A
    else:
        side = "right"  # User B's messages on the right
        bubble_color = "#E5E5EA"  # Light grey for User B

    # Create the HTML for the chat bubble
    message_html = f"""
    <div style="display: flex; justify-content: {side}; margin-bottom: 10px;">
        <div style="max-width: 70%; word-wrap: break-word; background-color: {bubble_color}; padding: 10px; border-radius: 10px;">
            <p style="margin: 0; font-size: 14px;">{text_message}</p>
        </div>
    </div>
    """

    # Add image if available
    if image_path:
        message_html += f"""
        <div style="display: flex; justify-content: {side}; margin-bottom: 10px;">
            <img src="{image_path}" width="200px" style="border-radius: 10px;">
        </div>
        """

    # Show translated text only for the receiver
    if translated_text and side == "right":
        message_html += f"""
        <div style="display: flex; justify-content: {side}; margin-bottom: 10px;">
            <div style="max-width: 70%; word-wrap: break-word; background-color: #E1FFC7; padding: 10px; border-radius: 10px;">
                <p style="margin: 0; font-size: 14px; color: green;">{translated_text}</p>
            </div>
        </div>
        """

    return message_html


In [8]:
# Shared chat log to store messages
chat_log = []

# Update the chat UI for both users
def update_chat():
    chat_html = ""

    # Loop through the chat log and create message bubbles
    for msg in chat_log:
        message_html = format_message(msg["sender"], msg["text_message"], msg["image_path"], msg["translated_text"])
        chat_html += message_html

    # Display the chat messages inside a scrollable area
    display(HTML(f"""
    <div style="width: 100%; height: 500px; overflow-y: scroll; padding: 10px; border: 1px solid #ddd;">
        {chat_html}
    </div>
    """))


In [9]:
# Function to handle sending of images and text messages
def handle_upload(user, uploader, text_message=None):
    if uploader.value:
        uploaded_file = next(iter(uploader.value.values()))
        filename = f"{user}_{uploaded_file['metadata']['name']}"
        filepath = os.path.join("chat_images", filename)
        with open(filepath, 'wb') as f:
            f.write(uploaded_file['content'])

        # Extract text and translate for the receiver (translation shown only to the receiver)
        extracted_text = extract_text_from_image(filepath)
        detected_lang = detect_language(extracted_text)
        if detected_lang != 'en':
            translated_text = translate_text(extracted_text, src_lang=detected_lang)
        else:
            translated_text = extracted_text

        # If the sender is User A, they will only see the image (no translation)
        if user == "A":
            chat_log.append({"sender": user, "image_path": filepath, "translated_text": None, "text_message": text_message})
        else:  # If the sender is User B, the receiver will see the image and the translation
            chat_log.append({"sender": user, "image_path": filepath, "translated_text": translated_text, "text_message": text_message})

        uploader.value.clear()
        update_chat()

# Handle text message submissions
def handle_text_message(user, text_area):
    text_message = text_area.value.strip()
    if text_message:
        chat_log.append({"sender": user, "image_path": None, "translated_text": None, "text_message": text_message})
        text_area.value = ""  # Clear text area after sending
        update_chat()


In [10]:
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML
import os

# Create an output widget to display the chat content
output = widgets.Output()

# Initialize chat_log to keep track of messages (for both text and images)
chat_log = []

# Helper function to format and display messages (both text and images)
def format_message(sender, text_message, image_path, translated_text, receiver, is_for_user_b=False):
    if sender == "A":
        sender_side = "right"  # User A's messages on the right side of User A's chat
        receiver_side = "left"  # User A's received messages on the left side of User B's chat
        bubble_color = "#DCF8C6"  # Light greenish for User A
    else:
        sender_side = "left"  # User B's messages on the left side of User A's chat
        receiver_side = "right"  # User B's received messages on the right side of User A's chat
        bubble_color = "#E5E5EA"  # Light grey for User B



    # For User B, we swap sides of the messages
    if is_for_user_b:
        sender_side, receiver_side = receiver_side, sender_side

    # HTML structure for the sender's chat bubble
    message_html = f"""
    <div style="display: flex; justify-content: {sender_side}; margin-bottom: 10px;">
        <div style="max-width: 70%; word-wrap: break-word; background-color: {bubble_color}; padding: 10px; border-radius: 10px;">
            <p style="margin: 0; font-size: 14px;">{text_message}</p>
        </div>
    </div>
    """

    # Add image if available
    if image_path:
        message_html += f"""
        <div style="display: flex; justify-content: {sender_side}; margin-bottom: 10px;">
            <img src="{image_path}" width="200px" style="border-radius: 10px;">
        </div>
        """

    # Show translated text only for the receiver (User B will see it)
    if translated_text and receiver == "B" and sender == "A":
        message_html += f"""
        <div style="display: flex; justify-content: {receiver_side}; margin-bottom: 10px;">
            <div style="max-width: 70%; word-wrap: break-word; background-color: #E1FFC7; padding: 10px; border-radius: 10px;">
                <p style="margin: 0; font-size: 14px; color: green;">{translated_text}</p>
            </div>
        </div>
        """
    elif translated_text and receiver == "A" and sender == "B":
        message_html += f"""
        <div style="display: flex; justify-content: {receiver_side}; margin-bottom: 10px;">
            <div style="max-width: 70%; word-wrap: break-word; background-color: #E1FFC7; padding: 10px; border-radius: 10px;">
                <p style="margin: 0; font-size: 14px; color: green;">{translated_text}</p>
            </div>
        </div>
        """

    return message_html

# Update the chat display after adding messages to the chat log
def update_chat():
    with output:
        clear_output()

        # Initialize empty string to hold all chat messages
        chat_html_A = ""
        chat_html_B = ""

        # Loop through the chat log and create message bubbles
        for msg in chat_log:
            # For User A, no need to swap sides
            message_html_A = format_message(msg["sender"], msg["text_message"], msg["image_path"], msg["translated_text"], msg["receiver"], is_for_user_b=False)
            # For User B, swap the sides for the right/left positioning
            message_html_B = format_message(msg["sender"], msg["text_message"], msg["image_path"], msg["translated_text"], msg["receiver"], is_for_user_b=True)

            # Append the formatted message to the appropriate user chat
            if msg["receiver"] == "A":
                chat_html_A += message_html_A
                chat_html_B += message_html_B  # For User B, mirrored message
            else:
                chat_html_B += message_html_B
                chat_html_A += message_html_A  # For User A, normal message

        # Display the chat messages inside a scrollable area with partition between User A and User B
        display(HTML(f"""
        <div style="width: 100%; height: 500px; overflow-y: scroll; padding: 10px; border: 1px solid #ddd;">
            <div style="display: flex; flex-direction: row; justify-content: space-between;">
                <div style="width: 48%; padding: 10px; border-right: 1px solid #ddd;">
                    <h4>User A's Chat</h4>
                    <div style="overflow-y: auto; height: 400px; background-color: #f1f1f1; padding: 10px; border-radius: 10px; border: 1px solid #ddd;">
                        {chat_html_A}
                    </div>
                </div>
                <div style="width: 48%; padding: 10px;">
                    <h4>User B's Chat</h4>
                    <div style="overflow-y: auto; height: 400px; background-color: #f1f1f1; padding: 10px; border-radius: 10px; border: 1px solid #ddd;">
                        {chat_html_B}
                    </div>
                </div>
            </div>
        </div>
        """))

# Function to handle uploading and sending of images and text
def handle_upload(user, receiver, uploader, text_message=None):
    if uploader.value:
        uploaded_file = next(iter(uploader.value.values()))
        filename = f"{user}_{uploaded_file['metadata']['name']}"
        filepath = os.path.join("chat_images", filename)
        with open(filepath, 'wb') as f:
            f.write(uploaded_file['content'])

        # Extract text and translate for the receiver (translation shown only to the receiver)
        extracted_text = extract_text_from_image(filepath)
        detected_lang = detect_language(extracted_text)
        if detected_lang != 'en':
            translated_text = translate_text(extracted_text, src_lang=detected_lang)
        else:
            translated_text = extracted_text

        # Add the message to chat log for both sender and receiver
        chat_log.append({"sender": user, "image_path": filepath, "translated_text": None, "text_message": text_message, "receiver": receiver})
        chat_log.append({"sender": user, "image_path": filepath, "translated_text": translated_text, "text_message": text_message, "receiver": receiver})

        uploader.value.clear()
        update_chat()

# Handle text message submissions
def handle_text_message(user, receiver, text_area):
    text_message = text_area.value.strip()
    if text_message:
        chat_log.append({"sender": user, "image_path": None, "translated_text": None, "text_message": text_message, "receiver": receiver})
        text_area.value = ""  # Clear text area after sending
        update_chat()

# Create the upload buttons and text areas for both users
upload_A = widgets.FileUpload(accept='image/*', multiple=False)
upload_B = widgets.FileUpload(accept='image/*', multiple=False)

text_A = widgets.Textarea(placeholder="Type a message...", description="Message A:")
text_B = widgets.Textarea(placeholder="Type a message...", description="Message B:")

# Create the send buttons for both users
send_A_image = widgets.Button(description="Send Image A")
send_A_text = widgets.Button(description="Send Text A")
send_B_image = widgets.Button(description="Send Image B")
send_B_text = widgets.Button(description="Send Text B")

# Link buttons to actions
send_A_image.on_click(lambda x: handle_upload("A", "B", upload_A))
send_A_text.on_click(lambda x: handle_text_message("A", "B", text_A))
send_B_image.on_click(lambda x: handle_upload("B", "A", upload_B))
send_B_text.on_click(lambda x: handle_text_message("B", "A", text_B))

# Display the interface for uploading and sending messages
display(widgets.HBox([widgets.VBox([widgets.Label("User A"), upload_A, text_A, send_A_image, send_A_text]),
                      widgets.VBox([widgets.Label("User B"), upload_B, text_B, send_B_image, send_B_text])]))

# Display the output area for chat messages
display(output)


HBox(children=(VBox(children=(Label(value='User A'), FileUpload(value={}, accept='image/*', description='Uploa…

Output()

through model

In [11]:
import ipywidgets as widgets
from IPython.display import display, HTML
from base64 import b64encode
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from langdetect import detect
import torch

# Load NLLB model
model_name = "facebook/nllb-200-distilled-600M"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Language code mappings
lang_code_map = {
    "English": "eng_Latn", "Hindi": "hin_Deva", "Gujarati": "guj_Gujr",
    "Tamil": "tam_Taml", "Bengali": "ben_Beng", "Telugu": "tel_Telu",
    "Marathi": "mar_Deva", "Kannada": "kan_Knda", "Malayalam": "mal_Mlym"
}

lang_detect_map = {
    "en": "eng_Latn", "hi": "hin_Deva", "gu": "guj_Gujr", "ta": "tam_Taml",
    "bn": "ben_Beng", "te": "tel_Telu", "mr": "mar_Deva", "kn": "kan_Knda",
    "ml": "mal_Mlym"
}

def translate_message(message, target_lang):
    try:
        source_lang_code = detect(message)
        src_lang = lang_detect_map.get(source_lang_code, "eng_Latn")
        tgt_lang = lang_code_map.get(target_lang, "eng_Latn")

        tokenizer.src_lang = src_lang
        inputs = tokenizer(message, return_tensors="pt")
        generated_tokens = model.generate(
            **inputs,
            forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
            max_length=512
        )
        return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
    except Exception as e:
        return f"[Translation Error: {str(e)}]"

# Light style override
display(HTML('''
<style>
.chat-wrapper {
    display: flex;
    width: 100%;
    height: 100%;
}
.divider {
    width: 1px;
    background-color: #ccc;
}
.chat-box {
    height: 250px;
    overflow-y: auto;
    padding: 10px;
    border: 1px solid #ddd;
}
.message-left {
    text-align: left;
    margin: 5px 0;
}
.message-right {
    text-align: right;
    margin: 5px 0;
}
</style>
'''))

# Chat boxes
chat_a = widgets.HTML()
chat_b = widgets.HTML()

# Inputs
input_a = widgets.Textarea(placeholder='Type a message...', layout=widgets.Layout(width='95%', height='40px'))
input_b = widgets.Textarea(placeholder='Type a message...', layout=widgets.Layout(width='95%', height='40px'))

# File upload
upload_a = widgets.FileUpload(accept='image/*', multiple=True)
upload_b = widgets.FileUpload(accept='image/*', multiple=True)

# Language
langs = ["English", "Hindi", "Gujarati", "Tamil", "Bengali", "Telugu", "Marathi", "Kannada", "Malayalam"]
lang_a = widgets.Dropdown(options=langs, value='English', layout=widgets.Layout(width='120px'))
lang_b = widgets.Dropdown(options=langs, value='English', layout=widgets.Layout(width='120px'))

# Send buttons
send_a = widgets.Button(description="Send", layout=widgets.Layout(width='60px'))
send_b = widgets.Button(description="Send", layout=widgets.Layout(width='60px'))

# History
history_a = ""
history_b = ""

def image_to_html(fileinfo):
    content = fileinfo['content']
    img_b64 = b64encode(content).decode('utf-8')
    return f'<img src="data:image/png;base64,{img_b64}" width="100" />'

def send_from_a(_):
    global history_a, history_b
    msg = input_a.value.strip()
    translated = translate_message(msg, lang_b.value) if msg else ""
    images = "".join([image_to_html(f) for f in upload_a.value.values()])
    if msg or images:
        history_a += f"<div class='message-right'><b>User A:</b><br>{msg}<br>{images}</div>"
        history_b += f"<div class='message-left'><b>User A:</b><br>{translated}<br>{images}</div>"
        chat_a.value = f"<div class='chat-box'>{history_a}</div>"
        chat_b.value = f"<div class='chat-box'>{history_b}</div>"
    input_a.value = ""
    upload_a.value.clear()

def send_from_b(_):
    global history_a, history_b
    msg = input_b.value.strip()
    translated = translate_message(msg, lang_a.value) if msg else ""
    images = "".join([image_to_html(f) for f in upload_b.value.values()])
    if msg or images:
        history_b += f"<div class='message-right'><b>User B:</b><br>{msg}<br>{images}</div>"
        history_a += f"<div class='message-left'><b>User B:</b><br>{translated}<br>{images}</div>"
        chat_b.value = f"<div class='chat-box'>{history_b}</div>"
        chat_a.value = f"<div class='chat-box'>{history_a}</div>"
    input_b.value = ""
    upload_b.value.clear()

send_a.on_click(send_from_a)
send_b.on_click(send_from_b)

# Panel A
panel_a = widgets.VBox([
    widgets.HTML("<b>User A</b>"),
    chat_a,
    input_a,
    widgets.HBox([
        upload_a, lang_a, send_a
    ], layout=widgets.Layout(justify_content='space-between', width='95%'))
], layout=widgets.Layout(width='100%'))

# Panel B
panel_b = widgets.VBox([
    widgets.HTML("<b>User B</b>"),
    chat_b,
    input_b,
    widgets.HBox([
        upload_b, lang_b, send_b
    ], layout=widgets.Layout(justify_content='space-between', width='95%'))
], layout=widgets.Layout(width='100%'))

# Layout
layout = widgets.HBox([
    widgets.VBox([panel_a], layout=widgets.Layout(width='50%')),
    widgets.HTML('<div class="divider"></div>'),
    widgets.VBox([panel_b], layout=widgets.Layout(width='50%'))
], layout=widgets.Layout(width='100%'))

display(layout)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


HBox(children=(VBox(children=(VBox(children=(HTML(value='<b>User A</b>'), HTML(value=''), Textarea(value='', l…

In [12]:
!apt install tesseract-ocr -y
!pip install pytesseract transformers langdetect


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
tesseract-ocr is already the newest version (4.1.1-2.1build1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


for text and image

In [13]:
import ipywidgets as widgets
from IPython.display import display, HTML
from base64 import b64encode
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from langdetect import detect
from PIL import Image
import pytesseract
import io
import torch

# Load NLLB model
model_name = "facebook/nllb-200-distilled-600M"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Language code mappings
lang_code_map = {
    "English": "eng_Latn", "Hindi": "hin_Deva", "Gujarati": "guj_Gujr",
    "Tamil": "tam_Taml", "Bengali": "ben_Beng", "Telugu": "tel_Telu",
    "Marathi": "mar_Deva", "Kannada": "kan_Knda", "Malayalam": "mal_Mlym"
}

lang_detect_map = {
    "en": "eng_Latn", "hi": "hin_Deva", "gu": "guj_Gujr", "ta": "tam_Taml",
    "bn": "ben_Beng", "te": "tel_Telu", "mr": "mar_Deva", "kn": "kan_Knda",
    "ml": "mal_Mlym"
}

# Translation function
def translate_message(message, target_lang):
    try:
        source_lang_code = detect(message)
        src_lang = lang_detect_map.get(source_lang_code, "eng_Latn")
        tgt_lang = lang_code_map.get(target_lang, "eng_Latn")

        tokenizer.src_lang = src_lang
        inputs = tokenizer(message, return_tensors="pt")
        generated_tokens = model.generate(
            **inputs,
            forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
            max_length=512
        )
        return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
    except Exception as e:
        return f"[Translation Error: {str(e)}]"

# OCR + Translate from image
def ocr_and_translate(image_bytes, target_lang):
    try:
        img = Image.open(io.BytesIO(image_bytes))
        extracted_text = pytesseract.image_to_string(img)
        if not extracted_text.strip():
            return "[No text detected in image]"
        return translate_message(extracted_text.strip(), target_lang)
    except Exception as e:
        return f"[OCR Error: {str(e)}]"

# Light UI styling
display(HTML('''
<style>
.chat-wrapper {
    display: flex;
    width: 100%;
    height: 100%;
}
.divider {
    width: 1px;
    background-color: #ccc;
}
.chat-box {
    height: 250px;
    overflow-y: auto;
    padding: 10px;
    border: 1px solid #ddd;
}
.message-left {
    text-align: left;
    margin: 5px 0;
}
.message-right {
    text-align: right;
    margin: 5px 0;
}
</style>
'''))

# Chat histories
chat_a = widgets.HTML()
chat_b = widgets.HTML()
history_a = ""
history_b = ""

# Inputs and Uploads
input_a = widgets.Textarea(placeholder='Type a message...', layout=widgets.Layout(width='95%', height='40px'))
input_b = widgets.Textarea(placeholder='Type a message...', layout=widgets.Layout(width='95%', height='40px'))
upload_a = widgets.FileUpload(accept='image/*', multiple=True)
upload_b = widgets.FileUpload(accept='image/*', multiple=True)

# Language selection
langs = ["English", "Hindi", "Gujarati", "Tamil", "Bengali", "Telugu", "Marathi", "Kannada", "Malayalam"]
lang_a = widgets.Dropdown(options=langs, value='English', layout=widgets.Layout(width='120px'))
lang_b = widgets.Dropdown(options=langs, value='English', layout=widgets.Layout(width='120px'))

# Buttons
send_a = widgets.Button(description="Send", layout=widgets.Layout(width='60px'))
send_b = widgets.Button(description="Send", layout=widgets.Layout(width='60px'))

# Convert image to HTML
def image_to_html(fileinfo):
    content = fileinfo['content']
    img_b64 = b64encode(content).decode('utf-8')
    return f'<img src="data:image/png;base64,{img_b64}" width="100" />'

# Send from A
def send_from_a(_):
    global history_a, history_b
    msg = input_a.value.strip()
    uploaded = list(upload_a.value.values())
    images_html = "".join([image_to_html(f) for f in uploaded])

    translated = ""
    if msg:
        translated = translate_message(msg, lang_b.value)
    elif uploaded and len(uploaded) == 1:
        translated = ocr_and_translate(uploaded[0]['content'], lang_b.value)

    if msg or uploaded:
        # User A: Original content
        history_a += f"<div class='message-right'><b>User A:</b><br>{msg}<br>{images_html}</div>"

        # User B: Image first, then translated text
        history_b += f"<div class='message-left'><b>User A:</b><br>{images_html}<br>{translated}</div>"

        chat_a.value = f"<div class='chat-box'>{history_a}</div>"
        chat_b.value = f"<div class='chat-box'>{history_b}</div>"

    input_a.value = ""
    upload_a.value.clear()
def send_from_b(_):
    global history_a, history_b
    msg = input_b.value.strip()
    uploaded = list(upload_b.value.values())
    images_html = "".join([image_to_html(f) for f in uploaded])

    translated = ""
    if msg:
        translated = translate_message(msg, lang_a.value)
    elif uploaded and len(uploaded) == 1:
        translated = ocr_and_translate(uploaded[0]['content'], lang_a.value)

    if msg or uploaded:
        # User B: Original content
        history_b += f"<div class='message-right'><b>User B:</b><br>{msg}<br>{images_html}</div>"

        # User A: Image first, then translated text
        history_a += f"<div class='message-left'><b>User B:</b><br>{images_html}<br>{translated}</div>"

        chat_b.value = f"<div class='chat-box'>{history_b}</div>"
        chat_a.value = f"<div class='chat-box'>{history_a}</div>"

    input_b.value = ""
    upload_b.value.clear()


# Send from B
def send_from_b(_):
    global history_a, history_b
    msg = input_b.value.strip()
    uploaded = list(upload_b.value.values())
    images_html = "".join([image_to_html(f) for f in uploaded])

    translated = ""
    if msg:
        translated = translate_message(msg, lang_a.value)
    elif uploaded and len(uploaded) == 1:
        translated = ocr_and_translate(uploaded[0]['content'], lang_a.value)

    if msg or uploaded:
        history_b += f"<div class='message-right'><b>User B:</b><br>{msg}<br>{images_html}</div>"
        history_a += f"<div class='message-left'><b>User B:</b><br>{translated}<br>{images_html}</div>"
        chat_b.value = f"<div class='chat-box'>{history_b}</div>"
        chat_a.value = f"<div class='chat-box'>{history_a}</div>"

    input_b.value = ""
    upload_b.value.clear()

send_a.on_click(send_from_a)
send_b.on_click(send_from_b)

# Panel A
panel_a = widgets.VBox([
    widgets.HTML("<b>User A</b>"),
    chat_a,
    input_a,
    widgets.HBox([upload_a, lang_a, send_a], layout=widgets.Layout(justify_content='space-between', width='95%'))
], layout=widgets.Layout(width='100%'))

# Panel B
panel_b = widgets.VBox([
    widgets.HTML("<b>User B</b>"),
    chat_b,
    input_b,
    widgets.HBox([upload_b, lang_b, send_b], layout=widgets.Layout(justify_content='space-between', width='95%'))
], layout=widgets.Layout(width='100%'))

# Layout
layout = widgets.HBox([
    widgets.VBox([panel_a], layout=widgets.Layout(width='50%')),
    widgets.HTML('<div class="divider"></div>'),
    widgets.VBox([panel_b], layout=widgets.Layout(width='50%'))
], layout=widgets.Layout(width='100%'))

display(layout)


HBox(children=(VBox(children=(VBox(children=(HTML(value='<b>User A</b>'), HTML(value=''), Textarea(value='', l…

for pdf and dox

In [14]:
!pip install PyMuPDF python-docx
!pip install PyPDF2





In [15]:
import ipywidgets as widgets
from IPython.display import display, HTML
from base64 import b64encode
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from langdetect import detect
from PIL import Image
import pytesseract
import io
import torch
from PyPDF2 import PdfReader

# Load NLLB model
model_name = "facebook/nllb-200-distilled-600M"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Language code mappings
lang_code_map = {
    "English": "eng_Latn", "Hindi": "hin_Deva", "Gujarati": "guj_Gujr",
    "Tamil": "tam_Taml", "Bengali": "ben_Beng", "Telugu": "tel_Telu",
    "Marathi": "mar_Deva", "Kannada": "kan_Knda", "Malayalam": "mal_Mlym"
}

lang_detect_map = {
    "en": "eng_Latn", "hi": "hin_Deva", "gu": "guj_Gujr", "ta": "tam_Taml",
    "bn": "ben_Beng", "te": "tel_Telu", "mr": "mar_Deva", "kn": "kan_Knda",
    "ml": "mal_Mlym"
}

# Translation function
def translate_message(message, target_lang):
    try:
        source_lang_code = detect(message)
        src_lang = lang_detect_map.get(source_lang_code, "eng_Latn")
        tgt_lang = lang_code_map.get(target_lang, "eng_Latn")

        tokenizer.src_lang = src_lang
        inputs = tokenizer(message, return_tensors="pt")
        generated_tokens = model.generate(
            **inputs,
            forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
            max_length=512
        )
        return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
    except Exception as e:
        return f"[Translation Error: {str(e)}]"

# OCR + Translate from image
def ocr_and_translate(image_bytes, target_lang):
    try:
        img = Image.open(io.BytesIO(image_bytes))
        extracted_text = pytesseract.image_to_string(img)
        if not extracted_text.strip():
            return "[No text detected in image]"
        return translate_message(extracted_text.strip(), target_lang)
    except Exception as e:
        return f"[OCR Error: {str(e)}]"

# PDF extraction function
def extract_text_from_pdf(pdf_bytes):
    try:
        reader = PdfReader(io.BytesIO(pdf_bytes))
        text = ""
        for page in reader.pages:
            text += page.extract_text()
        return text.strip()
    except Exception as e:
        return f"[PDF Error: {str(e)}]"

# Light UI styling
display(HTML('''
<style>
.chat-wrapper {
    display: flex;
    width: 100%;
    height: 100%;
}
.divider {
    width: 1px;
    background-color: #ccc;
}
.chat-box {
    height: 250px;
    overflow-y: auto;
    padding: 10px;
    border: 1px solid #ddd;
}
.message-left {
    text-align: left;
    margin: 5px 0;
}
.message-right {
    text-align: right;
    margin: 5px 0;
}
</style>
'''))

# Chat histories
chat_a = widgets.HTML()
chat_b = widgets.HTML()
history_a = ""
history_b = ""

# Inputs and Uploads
input_a = widgets.Textarea(placeholder='Type a message...', layout=widgets.Layout(width='95%', height='40px'))
input_b = widgets.Textarea(placeholder='Type a message...', layout=widgets.Layout(width='95%', height='40px'))
upload_a_img = widgets.FileUpload(accept='image/*', multiple=True)
upload_b_img = widgets.FileUpload(accept='image/*', multiple=True)
upload_a_pdf = widgets.FileUpload(accept='.pdf', multiple=True)
upload_b_pdf = widgets.FileUpload(accept='.pdf', multiple=True)

# Language selection
langs = ["English", "Hindi", "Gujarati", "Tamil", "Bengali", "Telugu", "Marathi", "Kannada", "Malayalam"]
lang_a = widgets.Dropdown(options=langs, value='English', layout=widgets.Layout(width='120px'))
lang_b = widgets.Dropdown(options=langs, value='English', layout=widgets.Layout(width='120px'))

# Buttons
send_a = widgets.Button(description="Send", layout=widgets.Layout(width='60px'))
send_b = widgets.Button(description="Send", layout=widgets.Layout(width='60px'))

# Convert image to HTML
def image_to_html(fileinfo):
    content = fileinfo['content']
    img_b64 = b64encode(content).decode('utf-8')
    return f'<img src="data:image/png;base64,{img_b64}" width="100" />'

# Convert PDF to HTML (display)
# Convert PDF to HTML (display only the file name)
def pdf_to_html(fileinfo):
    # Get the PDF file name directly
    pdf_name = fileinfo['metadata']['name'] if 'metadata' in fileinfo else "Unknown PDF"
    return f'<div><b>{pdf_name}</b></div>'  # Display only the file name


# Send from A
def send_from_a(_):
    global history_a, history_b
    msg = input_a.value.strip()
    uploaded_img = list(upload_a_img.value.values())
    uploaded_pdf = list(upload_a_pdf.value.values())

    images_html = "".join([image_to_html(f) for f in uploaded_img])
    pdf_html = "".join([pdf_to_html(f) for f in uploaded_pdf])

    translated = ""
    if msg:
        translated = translate_message(msg, lang_b.value)
    elif uploaded_img and len(uploaded_img) == 1:
        translated = ocr_and_translate(uploaded_img[0]['content'], lang_b.value)

    if msg or uploaded_img or uploaded_pdf:
        # User A: Original content
        history_a += f"<div class='message-right'><b>User A:</b><br>{msg}<br>{images_html}{pdf_html}</div>"

        # User B: Image first, then translated text
        history_b += f"<div class='message-left'><b>User A:</b><br>{images_html}{pdf_html}<br>{translated}</div>"

        chat_a.value = f"<div class='chat-box'>{history_a}</div>"
        chat_b.value = f"<div class='chat-box'>{history_b}</div>"

    input_a.value = ""
    upload_a_img.value.clear()
    upload_a_pdf.value.clear()

def send_from_b(_):
    global history_a, history_b
    msg = input_b.value.strip()
    uploaded_img = list(upload_b_img.value.values())
    uploaded_pdf = list(upload_b_pdf.value.values())

    images_html = "".join([image_to_html(f) for f in uploaded_img])
    pdf_html = "".join([pdf_to_html(f) for f in uploaded_pdf])

    translated = ""
    if msg:
        translated = translate_message(msg, lang_a.value)
    elif uploaded_img and len(uploaded_img) == 1:
        translated = ocr_and_translate(uploaded_img[0]['content'], lang_a.value)

    if msg or uploaded_img or uploaded_pdf:
        # User B: Original content
        history_b += f"<div class='message-right'><b>User B:</b><br>{msg}<br>{images_html}{pdf_html}</div>"

        # User A: Image first, then translated text
        history_a += f"<div class='message-left'><b>User B:</b><br>{images_html}{pdf_html}<br>{translated}</div>"

        chat_b.value = f"<div class='chat-box'>{history_b}</div>"
        chat_a.value = f"<div class='chat-box'>{history_a}</div>"

    input_b.value = ""
    upload_b_img.value.clear()
    upload_b_pdf.value.clear()


send_a.on_click(send_from_a)
send_b.on_click(send_from_b)

# Panel A
panel_a = widgets.VBox([
    widgets.HTML("<b>User A</b>"),
    chat_a,
    input_a,
    widgets.HBox([upload_a_img, upload_a_pdf, lang_a, send_a], layout=widgets.Layout(justify_content='space-between', width='95%'))
], layout=widgets.Layout(width='100%'))

# Panel B
panel_b = widgets.VBox([
    widgets.HTML("<b>User B</b>"),
    chat_b,
    input_b,
    widgets.HBox([upload_b_img, upload_b_pdf, lang_b, send_b], layout=widgets.Layout(justify_content='space-between', width='95%'))
], layout=widgets.Layout(width='100%'))

# Layout
layout = widgets.HBox([
    widgets.VBox([panel_a], layout=widgets.Layout(width='50%')),
    widgets.HTML('<div class="divider"></div>'),
    widgets.VBox([panel_b], layout=widgets.Layout(width='50%'))
], layout=widgets.Layout(width='100%'))

display(layout)


HBox(children=(VBox(children=(VBox(children=(HTML(value='<b>User A</b>'), HTML(value=''), Textarea(value='', l…

pdf transklation

In [16]:
pip install nltk




In [17]:
import nltk
nltk.download('punkt')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [18]:
import ipywidgets as widgets
from IPython.display import display, HTML
from base64 import b64encode
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from langdetect import detect
from PIL import Image
import pytesseract
import io
import torch
from PyPDF2 import PdfReader

# Load NLLB model
model_name = "facebook/nllb-200-distilled-600M"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Language code mappings
lang_code_map = {
    "English": "eng_Latn", "Hindi": "hin_Deva", "Gujarati": "guj_Gujr",
    "Tamil": "tam_Taml", "Bengali": "ben_Beng", "Telugu": "tel_Telu",
    "Marathi": "mar_Deva", "Kannada": "kan_Knda", "Malayalam": "mal_Mlym"
}

lang_detect_map = {
    "en": "eng_Latn", "hi": "hin_Deva", "gu": "guj_Gujr", "ta": "tam_Taml",
    "bn": "ben_Beng", "te": "tel_Telu", "mr": "mar_Deva", "kn": "kan_Knda",
    "ml": "mal_Mlym"
}

# Translation function
def translate_message(message, target_lang):
    try:
        source_lang_code = detect(message)
        src_lang = lang_detect_map.get(source_lang_code, "eng_Latn")
        tgt_lang = lang_code_map.get(target_lang, "eng_Latn")

        tokenizer.src_lang = src_lang
        inputs = tokenizer(message, return_tensors="pt")
        generated_tokens = model.generate(
            **inputs,
            forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
            max_length=512
        )
        return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
    except Exception as e:
        return f"[Translation Error: {str(e)}]"

# OCR + Translate from image
def ocr_and_translate(image_bytes, target_lang):
    try:
        img = Image.open(io.BytesIO(image_bytes))
        extracted_text = pytesseract.image_to_string(img)
        if not extracted_text.strip():
            return "[No text detected in image]"
        return translate_message(extracted_text.strip(), target_lang)
    except Exception as e:
        return f"[OCR Error: {str(e)}]"

# PDF extraction function
def extract_text_from_pdf(pdf_bytes):
    try:
        reader = PdfReader(io.BytesIO(pdf_bytes))
        text = ""
        for page in reader.pages:
            text += page.extract_text()
        return text.strip()
    except Exception as e:
        return f"[PDF Error: {str(e)}]"

# Light UI styling
display(HTML('''
<style>
.chat-wrapper {
    display: flex;
    width: 100%;
    height: 100%;
}
.divider {
    width: 1px;
    background-color: #ccc;
}
.chat-box {
    height: 250px;
    overflow-y: auto;
    padding: 10px;
    border: 1px solid #ddd;
}
.message-left {
    text-align: left;
    margin: 5px 0;
}
.message-right {
    text-align: right;
    margin: 5px 0;
}
</style>
'''))

# Chat histories
chat_a = widgets.HTML()
chat_b = widgets.HTML()
history_a = ""
history_b = ""

# Inputs and Uploads
input_a = widgets.Textarea(placeholder='Type a message...', layout=widgets.Layout(width='95%', height='40px'))
input_b = widgets.Textarea(placeholder='Type a message...', layout=widgets.Layout(width='95%', height='40px'))
upload_a_img = widgets.FileUpload(accept='image/*', multiple=True)
upload_b_img = widgets.FileUpload(accept='image/*', multiple=True)
upload_a_pdf = widgets.FileUpload(accept='.pdf', multiple=True)
upload_b_pdf = widgets.FileUpload(accept='.pdf', multiple=True)

# Language selection
langs = ["English", "Hindi", "Gujarati", "Tamil", "Bengali", "Telugu", "Marathi", "Kannada", "Malayalam"]
lang_a = widgets.Dropdown(options=langs, value='English', layout=widgets.Layout(width='120px'))
lang_b = widgets.Dropdown(options=langs, value='English', layout=widgets.Layout(width='120px'))

# Buttons
send_a = widgets.Button(description="Send", layout=widgets.Layout(width='60px'))
send_b = widgets.Button(description="Send", layout=widgets.Layout(width='60px'))

# Convert image to HTML
def image_to_html(fileinfo):
    content = fileinfo['content']
    img_b64 = b64encode(content).decode('utf-8')
    return f'<img src="data:image/png;base64,{img_b64}" width="100" />'

# Convert PDF to HTML (display only the file name)
def pdf_to_html(fileinfo):
    # Get the PDF file name directly
    pdf_name = fileinfo['metadata']['name'] if 'metadata' in fileinfo else "Unknown PDF"
    return f'<div><b>{pdf_name}</b></div>'  # Display only the file name


# Send from A
def send_from_a(_):
    global history_a, history_b
    msg = input_a.value.strip()
    uploaded_img = list(upload_a_img.value.values())
    uploaded_pdf = list(upload_a_pdf.value.values())

    images_html = "".join([image_to_html(f) for f in uploaded_img])
    pdf_html = "".join([pdf_to_html(f) for f in uploaded_pdf])

    translated = ""
    if msg:
        translated = translate_message(msg, lang_b.value)
    elif uploaded_img and len(uploaded_img) == 1:
        translated = ocr_and_translate(uploaded_img[0]['content'], lang_b.value)
    elif uploaded_pdf and len(uploaded_pdf) == 1:
        pdf_text = extract_text_from_pdf(uploaded_pdf[0]['content'])
        translated = translate_message(pdf_text, lang_b.value)

    if msg or uploaded_img or uploaded_pdf:
        # User A: Original content
        history_a += f"<div class='message-right'><b>User A:</b><br>{msg}<br>{images_html}{pdf_html}</div>"

        # User B: Translated text only
        history_b += f"<div class='message-left'><b>User A:</b><br>{translated}</div>"

        chat_a.value = f"<div class='chat-box'>{history_a}</div>"
        chat_b.value = f"<div class='chat-box'>{history_b}</div>"

    input_a.value = ""
    upload_a_img.value.clear()
    upload_a_pdf.value.clear()

def send_from_b(_):
    global history_a, history_b
    msg = input_b.value.strip()
    uploaded_img = list(upload_b_img.value.values())
    uploaded_pdf = list(upload_b_pdf.value.values())

    images_html = "".join([image_to_html(f) for f in uploaded_img])
    pdf_html = "".join([pdf_to_html(f) for f in uploaded_pdf])

    translated = ""
    if msg:
        translated = translate_message(msg, lang_a.value)
    elif uploaded_img and len(uploaded_img) == 1:
        translated = ocr_and_translate(uploaded_img[0]['content'], lang_a.value)
    elif uploaded_pdf and len(uploaded_pdf) == 1:
        pdf_text = extract_text_from_pdf(uploaded_pdf[0]['content'])
        translated = translate_message(pdf_text, lang_a.value)

    if msg or uploaded_img or uploaded_pdf:
        # User B: Original content
        history_b += f"<div class='message-right'><b>User B:</b><br>{msg}<br>{images_html}{pdf_html}</div>"

        # User A: Translated text only
        history_a += f"<div class='message-left'><b>User B:</b><br>{translated}</div>"

        chat_b.value = f"<div class='chat-box'>{history_b}</div>"
        chat_a.value = f"<div class='chat-box'>{history_a}</div>"

    input_b.value = ""
    upload_b_img.value.clear()
    upload_b_pdf.value.clear()


send_a.on_click(send_from_a)
send_b.on_click(send_from_b)

# Panel A
panel_a = widgets.VBox([
    widgets.HTML("<b>User A</b>"),
    chat_a,
    input_a,
    widgets.HBox([upload_a_img, upload_a_pdf, lang_a, send_a], layout=widgets.Layout(justify_content='space-between', width='95%'))
], layout=widgets.Layout(width='100%'))

# Panel B
panel_b = widgets.VBox([
    widgets.HTML("<b>User B</b>"),
    chat_b,
    input_b,
    widgets.HBox([upload_b_img, upload_b_pdf, lang_b, send_b], layout=widgets.Layout(justify_content='space-between', width='95%'))
], layout=widgets.Layout(width='100%'))

# Layout
layout = widgets.HBox([
    widgets.VBox([panel_a], layout=widgets.Layout(width='50%')),
    widgets.HTML('<div class="divider"></div>'),
    widgets.VBox([panel_b], layout=widgets.Layout(width='50%'))
], layout=widgets.Layout(width='100%'))

display(layout)


HBox(children=(VBox(children=(VBox(children=(HTML(value='<b>User A</b>'), HTML(value=''), Textarea(value='', l…

In [19]:
pip install pymupdf




final code for text,image and pdf translation by nllb model of facebook(NO API)

In [20]:
import ipywidgets as widgets
from IPython.display import display, HTML
from base64 import b64encode
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from langdetect import detect
from PIL import Image
import pytesseract
import io
import torch
from PyPDF2 import PdfReader
import re

# Load NLLB model
model_name = "facebook/nllb-200-distilled-600M"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Language code mappings
lang_code_map = {
    "English": "eng_Latn", "Hindi": "hin_Deva", "Gujarati": "guj_Gujr",
    "Tamil": "tam_Taml", "Bengali": "ben_Beng", "Telugu": "tel_Telu",
    "Marathi": "mar_Deva", "Kannada": "kan_Knda", "Malayalam": "mal_Mlym"
}

lang_detect_map = {
    "en": "eng_Latn", "hi": "hin_Deva", "gu": "guj_Gujr", "ta": "tam_Taml",
    "bn": "ben_Beng", "te": "tel_Telu", "mr": "mar_Deva", "kn": "kan_Knda",
    "ml": "mal_Mlym"
}

def _safe_detect(text: str) -> str:
    try:
        return detect(text)
    except Exception:
        return "en"

def _chunk_text(s: str, max_chars: int = 1200):
    s = re.sub(r'\s+', ' ', s).strip()
    if len(s) <= max_chars:
        return [s]
    chunks, buf = [], []
    total = 0
    for part in re.split(r'(?<=[.!?])\s+', s):  # split on sentence-ish boundaries
        if total + len(part) + 1 > max_chars and buf:
            chunks.append(' '.join(buf))
            buf, total = [part], len(part)
        else:
            buf.append(part)
            total += len(part) + 1
    if buf:
        chunks.append(' '.join(buf))
    return chunks

# Translation function (chunk-safe + graceful detection)
def translate_message(message, target_lang):
    try:
        if not message or not message.strip():
            return "[No text to translate]"
        source_lang_code = _safe_detect(message)
        src_lang = lang_detect_map.get(source_lang_code, "eng_Latn")
        tgt_lang = lang_code_map.get(target_lang, "eng_Latn")

        tokenizer.src_lang = src_lang
        translated_chunks = []
        for chunk in _chunk_text(message, max_chars=1200):
            inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=900)
            generated_tokens = model.generate(
                **inputs,
                forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
                max_length=512
            )
            translated_chunks.append(tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0])
        return " ".join(translated_chunks).strip()
    except Exception as e:
        return f"[Translation Error: {str(e)}]"

# OCR + Translate from image
def ocr_and_translate(image_bytes, target_lang):
    try:
        img = Image.open(io.BytesIO(image_bytes))
        extracted_text = pytesseract.image_to_string(img)
        if not extracted_text or not extracted_text.strip():
            return "[No text detected in image]"
        return translate_message(extracted_text.strip(), target_lang)
    except Exception as e:
        return f"[OCR Error: {str(e)}]"

# PDF extraction with OCR fallback (handles scanned PDFs)
def extract_text_from_pdf(pdf_bytes):
    # 1) Try selectable text via PyPDF2
    try:
        reader = PdfReader(io.BytesIO(pdf_bytes))
        text_parts = []
        for page in reader.pages:
            t = page.extract_text() or ""
            text_parts.append(t)
        text = "\n".join(text_parts).strip()
        if text:
            return text
    except Exception:
        pass

    # 2) Fallback: OCR each page via PyMuPDF -> image -> Tesseract
    try:
        import fitz  # PyMuPDF
        doc = fitz.open(stream=pdf_bytes, filetype="pdf")
        ocr_text = []
        for pno in range(len(doc)):
            page = doc[pno]
            pix = page.get_pixmap(dpi=200)  # rasterize
            img = Image.open(io.BytesIO(pix.tobytes("png")))
            ocr_text.append(pytesseract.image_to_string(img))
        return "\n".join(ocr_text).strip()
    except Exception:
        # If PyMuPDF not installed or OCR fails
        return ""

# Light UI styling
display(HTML('''
<style>
.chat-wrapper {
    display: flex;
    width: 100%;
    height: 100%;
}
.divider {
    width: 1px;
    background-color: #ccc;
}
.chat-box {
    height: 250px;
    overflow-y: auto;
    padding: 10px;
    border: 1px solid #ddd;
}
.message-left {
    text-align: left;
    margin: 5px 0;
}
.message-right {
    text-align: right;
    margin: 5px 0;
}
</style>
'''))

# Chat histories
chat_a = widgets.HTML()
chat_b = widgets.HTML()
history_a = ""
history_b = ""

# Inputs and Uploads (labels set to "image" and "pdf")
input_a = widgets.Textarea(placeholder='Type a message...', layout=widgets.Layout(width='95%', height='40px'))
input_b = widgets.Textarea(placeholder='Type a message...', layout=widgets.Layout(width='95%', height='40px'))
upload_a_img = widgets.FileUpload(accept='image/*', multiple=True, description='image')
upload_b_img = widgets.FileUpload(accept='image/*', multiple=True, description='image')
upload_a_pdf = widgets.FileUpload(accept='.pdf', multiple=True, description='pdf')
upload_b_pdf = widgets.FileUpload(accept='.pdf', multiple=True, description='pdf')

# Language selection
langs = ["English", "Hindi", "Gujarati", "Tamil", "Bengali", "Telugu", "Marathi", "Kannada", "Malayalam"]
lang_a = widgets.Dropdown(options=langs, value='English', layout=widgets.Layout(width='120px'))
lang_b = widgets.Dropdown(options=langs, value='English', layout=widgets.Layout(width='120px'))

# Buttons
send_a = widgets.Button(description="Send", layout=widgets.Layout(width='60px'))
send_b = widgets.Button(description="Send", layout=widgets.Layout(width='60px'))

# Convert image to HTML
def image_to_html(fileinfo):
    content = fileinfo['content']
    img_b64 = b64encode(content).decode('utf-8')
    return f'<img src="data:image/png;base64,{img_b64}" width="100" />'

# Convert PDF to HTML (display only the file name)
def pdf_to_html(fileinfo):
    pdf_name = fileinfo['metadata']['name'] if 'metadata' in fileinfo else "Unknown PDF"
    return f'<div><b>{pdf_name}</b></div>'

# Send from A
def send_from_a(_):
    global history_a, history_b
    msg = input_a.value.strip()
    uploaded_img = list(upload_a_img.value.values())
    uploaded_pdf = list(upload_a_pdf.value.values())

    images_html = "".join([image_to_html(f) for f in uploaded_img])
    pdf_html = "".join([pdf_to_html(f) for f in uploaded_pdf])

    translated = ""
    if msg:
        translated = translate_message(msg, lang_b.value)
    elif uploaded_img and len(uploaded_img) == 1:
        translated = ocr_and_translate(uploaded_img[0]['content'], lang_b.value)
    elif uploaded_pdf and len(uploaded_pdf) == 1:
        pdf_text = extract_text_from_pdf(uploaded_pdf[0]['content'])
        translated = translate_message(pdf_text, lang_b.value) if pdf_text else "[No text found in PDF]"

    if msg or uploaded_img or uploaded_pdf:
        # User A: original content (image shown, pdf name shown)
        history_a += f"<div class='message-right'><b>User A:</b><br>{msg}<br>{images_html}{pdf_html}</div>"

        # User B: translated text only
        history_b += f"<div class='message-left'><b>User A:</b><br>{translated}</div>"

        chat_a.value = f"<div class='chat-box'>{history_a}</div>"
        chat_b.value = f"<div class='chat-box'>{history_b}</div>"

    input_a.value = ""
    upload_a_img.value.clear()
    upload_a_pdf.value.clear()

# Send from B
def send_from_b(_):
    global history_a, history_b
    msg = input_b.value.strip()
    uploaded_img = list(upload_b_img.value.values())
    uploaded_pdf = list(upload_b_pdf.value.values())

    images_html = "".join([image_to_html(f) for f in uploaded_img])
    pdf_html = "".join([pdf_to_html(f) for f in uploaded_pdf])

    translated = ""
    if msg:
        translated = translate_message(msg, lang_a.value)
    elif uploaded_img and len(uploaded_img) == 1:
        translated = ocr_and_translate(uploaded_img[0]['content'], lang_a.value)
    elif uploaded_pdf and len(uploaded_pdf) == 1:
        pdf_text = extract_text_from_pdf(uploaded_pdf[0]['content'])
        translated = translate_message(pdf_text, lang_a.value) if pdf_text else "[No text found in PDF]"

    if msg or uploaded_img or uploaded_pdf:
        # User B: original content (image shown, pdf name shown)
        history_b += f"<div class='message-right'><b>User B:</b><br>{msg}<br>{images_html}{pdf_html}</div>"

        # User A: translated text only
        history_a += f"<div class='message-left'><b>User B:</b><br>{translated}</div>"

        chat_b.value = f"<div class='chat-box'>{history_b}</div>"
        chat_a.value = f"<div class='chat-box'>{history_a}</div>"

    input_b.value = ""
    upload_b_img.value.clear()
    upload_b_pdf.value.clear()

send_a.on_click(send_from_a)
send_b.on_click(send_from_b)

# Panel A
panel_a = widgets.VBox([
    widgets.HTML("<b>User A</b>"),
    chat_a,
    input_a,
    widgets.HBox([upload_a_img, upload_a_pdf, lang_a, send_a], layout=widgets.Layout(justify_content='space-between', width='95%'))
], layout=widgets.Layout(width='100%'))

# Panel B
panel_b = widgets.VBox([
    widgets.HTML("<b>User B</b>"),
    chat_b,
    input_b,
    widgets.HBox([upload_b_img, upload_b_pdf, lang_b, send_b], layout=widgets.Layout(justify_content='space-between', width='95%'))
], layout=widgets.Layout(width='100%'))

# Layout
layout = widgets.HBox([
    widgets.VBox([panel_a], layout=widgets.Layout(width='50%')),
    widgets.HTML('<div class="divider"></div>'),
    widgets.VBox([panel_b], layout=widgets.Layout(width='50%'))
], layout=widgets.Layout(width='100%'))

display(layout)


HBox(children=(VBox(children=(VBox(children=(HTML(value='<b>User A</b>'), HTML(value=''), Textarea(value='', l…

all lang trans with arabic text,image and pdf translation and can receive in arabic too

In [21]:
!sudo apt-get install -y tesseract-ocr-ara


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
tesseract-ocr-ara is already the newest version (1:4.00~git30-7274cfa-1.1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


In [22]:
import ipywidgets as widgets
from IPython.display import display, HTML
from base64 import b64encode
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from langdetect import detect
from PIL import Image
import pytesseract
import io
import torch
from PyPDF2 import PdfReader
import re

# ======================
# Load NLLB model
# ======================
model_name = "facebook/nllb-200-distilled-600M"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# ======================
# Language code mappings
# ======================
lang_code_map = {
    "English": "eng_Latn", "Hindi": "hin_Deva", "Gujarati": "guj_Gujr",
    "Tamil": "tam_Taml", "Bengali": "ben_Beng", "Telugu": "tel_Telu",
    "Marathi": "mar_Deva", "Kannada": "kan_Knda", "Malayalam": "mal_Mlym",
    "Arabic": "arb_Arab"
}

lang_detect_map = {
    "en": "eng_Latn", "hi": "hin_Deva", "gu": "guj_Gujr", "ta": "tam_Taml",
    "bn": "ben_Beng", "te": "tel_Telu", "mr": "mar_Deva", "kn": "kan_Knda",
    "ml": "mal_Mlym", "ar": "arb_Arab"
}

# ======================
# Utilities
# ======================
def _safe_detect(text: str) -> str:
    try:
        return detect(text)
    except Exception:
        return "en"

def _chunk_text(s: str, max_chars: int = 1200):
    s = re.sub(r'\s+', ' ', s).strip()
    if len(s) <= max_chars:
        return [s]
    chunks, buf = [], []
    total = 0
    for part in re.split(r'(?<=[.!?])\s+', s):
        if total + len(part) + 1 > max_chars and buf:
            chunks.append(' '.join(buf))
            buf, total = [part], len(part)
        else:
            buf.append(part)
            total += len(part) + 1
    if buf:
        chunks.append(' '.join(buf))
    return chunks

# ======================
# Translation function
# ======================
def translate_message(message, target_lang):
    try:
        if not message or not message.strip():
            return "[No text to translate]"
        source_lang_code = _safe_detect(message)
        src_lang = lang_detect_map.get(source_lang_code, "eng_Latn")
        tgt_lang = lang_code_map.get(target_lang, "eng_Latn")

        tokenizer.src_lang = src_lang
        translated_chunks = []
        for chunk in _chunk_text(message, max_chars=1200):
            inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=900)
            generated_tokens = model.generate(
                **inputs,
                forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
                max_length=512
            )
            translated_chunks.append(tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0])
        return " ".join(translated_chunks).strip()
    except Exception as e:
        return f"[Translation Error: {str(e)}]"

# ======================
# OCR + Translate from image
# ======================
def ocr_and_translate(image_bytes, target_lang):
    try:
        img = Image.open(io.BytesIO(image_bytes))
        extracted_text = pytesseract.image_to_string(img, lang="ara+eng")  # Arabic + English OCR
        if not extracted_text or not extracted_text.strip():
            return "[No text detected in image]"
        return translate_message(extracted_text.strip(), target_lang)
    except Exception as e:
        return f"[OCR Error: {str(e)}]"

# ======================
# PDF extraction with OCR fallback
# ======================
def extract_text_from_pdf(pdf_bytes, target_lang):
    try:
        reader = PdfReader(io.BytesIO(pdf_bytes))
        text_parts = []
        for page in reader.pages:
            t = page.extract_text() or ""
            text_parts.append(t)
        text = "\n".join(text_parts).strip()
        if text:
            return text
    except Exception:
        pass

    try:
        import fitz  # PyMuPDF
        doc = fitz.open(stream=pdf_bytes, filetype="pdf")
        ocr_text = []
        for pno in range(len(doc)):
            page = doc[pno]
            pix = page.get_pixmap(dpi=200)
            img = Image.open(io.BytesIO(pix.tobytes("png")))
            ocr_text.append(pytesseract.image_to_string(img, lang="ara+eng"))
        return "\n".join(ocr_text).strip()
    except Exception:
        return ""

# ======================
# Light UI styling
# ======================
display(HTML('''
<style>
.chat-box {
    height: 250px;
    overflow-y: auto;
    padding: 10px;
    border: 1px solid #ddd;
    background: #fefefe;
}
.userA-right { text-align: right; background: #0078D7; color: white; padding: 8px 10px; margin: 5px; border-radius: 12px; float: right; clear: both; }
.userA-left  { text-align: left;  background: #e1f5fe; color: black; padding: 8px 10px; margin: 5px; border-radius: 12px; float: left; clear: both; }
.userB-right { text-align: right; background: #4CAF50; color: white; padding: 8px 10px; margin: 5px; border-radius: 12px; float: right; clear: both; }
.userB-left  { text-align: left;  background: #c8e6c9; color: black; padding: 8px 10px; margin: 5px; border-radius: 12px; float: left; clear: both; }
</style>
'''))

# ======================
# Chat UI setup
# ======================
chat_a = widgets.HTML()
chat_b = widgets.HTML()
history_a = ""
history_b = ""

input_a = widgets.Textarea(placeholder='Type a message...', layout=widgets.Layout(width='95%', height='40px'))
input_b = widgets.Textarea(placeholder='Type a message...', layout=widgets.Layout(width='95%', height='40px'))

upload_a_img = widgets.FileUpload(accept='image/*', multiple=True, description='image')
upload_b_img = widgets.FileUpload(accept='image/*', multiple=True, description='image')
upload_a_pdf = widgets.FileUpload(accept='.pdf', multiple=True, description='pdf')
upload_b_pdf = widgets.FileUpload(accept='.pdf', multiple=True, description='pdf')

langs = ["English", "Hindi", "Gujarati", "Tamil", "Bengali", "Telugu", "Marathi", "Kannada", "Malayalam", "Arabic"]
lang_a = widgets.Dropdown(options=langs, value='English', layout=widgets.Layout(width='120px'))
lang_b = widgets.Dropdown(options=langs, value='Arabic', layout=widgets.Layout(width='120px'))

send_a = widgets.Button(description="Send", layout=widgets.Layout(width='60px'))
send_b = widgets.Button(description="Send", layout=widgets.Layout(width='60px'))

def image_to_html(fileinfo):
    content = fileinfo['content']
    img_b64 = b64encode(content).decode('utf-8')
    return f'<img src="data:image/png;base64,{img_b64}" width="100" />'

def pdf_to_html(fileinfo):
    pdf_name = fileinfo['metadata']['name'] if 'metadata' in fileinfo else "Unknown PDF"
    return f'<div><b>{pdf_name}</b></div>'

# ======================
# Sending logic
# ======================
def send_from_a(_):
    global history_a, history_b
    msg = input_a.value.strip()
    uploaded_img = list(upload_a_img.value.values())
    uploaded_pdf = list(upload_a_pdf.value.values())

    images_html = "".join([image_to_html(f) for f in uploaded_img])
    pdf_html = "".join([pdf_to_html(f) for f in uploaded_pdf])

    translated = ""
    if msg:
        translated = translate_message(msg, lang_b.value)
    elif uploaded_img and len(uploaded_img) == 1:
        translated = ocr_and_translate(uploaded_img[0]['content'], lang_b.value)
    elif uploaded_pdf and len(uploaded_pdf) == 1:
        pdf_text = extract_text_from_pdf(uploaded_pdf[0]['content'], lang_b.value)
        translated = translate_message(pdf_text, lang_b.value) if pdf_text else "[No text found in PDF]"

    if msg or uploaded_img or uploaded_pdf:
        history_a += f"<div class='userA-right'>User A: {msg}{images_html}{pdf_html}</div>"
        history_b += f"<div class='userA-left'>User A: {translated}</div>"

        chat_a.value = f"<div class='chat-box'>{history_a}</div>"
        chat_b.value = f"<div class='chat-box'>{history_b}</div>"

    input_a.value = ""
    upload_a_img.value.clear()
    upload_a_pdf.value.clear()

def send_from_b(_):
    global history_a, history_b
    msg = input_b.value.strip()
    uploaded_img = list(upload_b_img.value.values())
    uploaded_pdf = list(upload_b_pdf.value.values())

    images_html = "".join([image_to_html(f) for f in uploaded_img])
    pdf_html = "".join([pdf_to_html(f) for f in uploaded_pdf])

    translated = ""
    if msg:
        translated = translate_message(msg, lang_a.value)
    elif uploaded_img and len(uploaded_img) == 1:
        translated = ocr_and_translate(uploaded_img[0]['content'], lang_a.value)
    elif uploaded_pdf and len(uploaded_pdf) == 1:
        pdf_text = extract_text_from_pdf(uploaded_pdf[0]['content'], lang_a.value)
        translated = translate_message(pdf_text, lang_a.value) if pdf_text else "[No text found in PDF]"

    if msg or uploaded_img or uploaded_pdf:
        history_b += f"<div class='userB-right'>User B: {msg}{images_html}{pdf_html}</div>"
        history_a += f"<div class='userB-left'>User B: {translated}</div>"

        chat_b.value = f"<div class='chat-box'>{history_b}</div>"
        chat_a.value = f"<div class='chat-box'>{history_a}</div>"

    input_b.value = ""
    upload_b_img.value.clear()
    upload_b_pdf.value.clear()

send_a.on_click(send_from_a)
send_b.on_click(send_from_b)

# ======================
# Final Layout
# ======================
panel_a = widgets.VBox([
    widgets.HTML("<b>User A</b>"),
    chat_a,
    input_a,
    widgets.HBox([upload_a_img, upload_a_pdf, lang_a, send_a], layout=widgets.Layout(justify_content='space-between', width='95%'))
], layout=widgets.Layout(width='100%'))

panel_b = widgets.VBox([
    widgets.HTML("<b>User B</b>"),
    chat_b,
    input_b,
    widgets.HBox([upload_b_img, upload_b_pdf, lang_b, send_b], layout=widgets.Layout(justify_content='space-between', width='95%'))
], layout=widgets.Layout(width='100%'))

layout = widgets.HBox([
    widgets.VBox([panel_a], layout=widgets.Layout(width='50%')),
    widgets.VBox([panel_b], layout=widgets.Layout(width='50%'))
], layout=widgets.Layout(width='100%'))

display(layout)


HBox(children=(VBox(children=(VBox(children=(HTML(value='<b>User A</b>'), HTML(value=''), Textarea(value='', l…

paddle ocr for arabic fall-easy and trocr for english and tesseract fall

In [23]:
!pip install "paddleocr>=2.7"
!pip install paddlepaddle




In [24]:
!pip install easyocr




In [31]:
import ipywidgets as widgets
from IPython.display import display, HTML
from base64 import b64encode
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from langdetect import detect
from PIL import Image
import pytesseract
import io
import torch
from PyPDF2 import PdfReader
import re
import numpy as np

# OCR Engines
from paddleocr import PaddleOCR
import easyocr
from transformers import TrOCRProcessor, VisionEncoderDecoderModel

# ======================
# Load NLLB model
# ======================
model_name = "facebook/nllb-200-distilled-600M"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# ======================
# Load OCR models once
# ======================
paddle_ocr = PaddleOCR(lang='ar')       # Arabic OCR
easy_ocr = easyocr.Reader(['ar'])       # Fallback Arabic
trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")

# ======================
# Language code mappings
# ======================
lang_code_map = {
    "English": "eng_Latn", "Hindi": "hin_Deva", "Gujarati": "guj_Gujr",
    "Tamil": "tam_Taml", "Bengali": "ben_Beng", "Telugu": "tel_Telu",
    "Marathi": "mar_Deva", "Kannada": "kan_Knda", "Malayalam": "mal_Mlym",
    "Arabic": "arb_Arab"
}

lang_detect_map = {
    "en": "eng_Latn", "hi": "hin_Deva", "gu": "guj_Gujr", "ta": "tam_Taml",
    "bn": "ben_Beng", "te": "tel_Telu", "mr": "mar_Deva", "kn": "kan_Knda",
    "ml": "mal_Mlym", "ar": "arb_Arab"
}

# ======================
# Utilities
# ======================
def _safe_detect(text: str) -> str:
    try:
        return detect(text)
    except Exception:
        return "en"

def _chunk_text(s: str, max_chars: int = 1200):
    s = re.sub(r'\s+', ' ', s).strip()
    if len(s) <= max_chars:
        return [s]
    chunks, buf = [], []
    total = 0
    for part in re.split(r'(?<=[.!?])\s+', s):
        if total + len(part) + 1 > max_chars and buf:
            chunks.append(' '.join(buf))
            buf, total = [part], len(part)
        else:
            buf.append(part)
            total += len(part) + 1
    if buf:
        chunks.append(' '.join(buf))
    return chunks

def detect_script(text: str) -> str:
    """Roughly detect Arabic vs English script"""
    arabic_chars = re.findall(r'[\u0600-\u06FF]', text)
    if len(arabic_chars) > 0:
        return "ar"
    return "en"

# ======================
# Translation function
# ======================
def translate_message(message, target_lang):
    try:
        if not message or not message.strip():
            return "[No text to translate]"
        source_lang_code = _safe_detect(message)
        src_lang = lang_detect_map.get(source_lang_code, "eng_Latn")
        tgt_lang = lang_code_map.get(target_lang, "eng_Latn")

        tokenizer.src_lang = src_lang
        translated_chunks = []
        for chunk in _chunk_text(message, max_chars=1200):
            inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=900)
            generated_tokens = model.generate(
                **inputs,
                forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
                max_length=512
            )
            translated_chunks.append(tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0])
        return " ".join(translated_chunks).strip()
    except Exception as e:
        return f"[Translation Error: {str(e)}]"

# ======================
# OCR + Translate from image
# ======================
def ocr_and_translate(image_bytes, target_lang):
    try:
        img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
        np_img = np.array(img)

        # Quick check with tesseract for script
        quick_text = pytesseract.image_to_string(img, lang="ara+eng").strip()
        script = detect_script(quick_text) if quick_text else "en"

        extracted_text = ""
        if script == "ar":
            try:
                result = paddle_ocr.ocr(np_img, cls=True)
                extracted_text = " ".join([line[1][0] for line in result[0]])
            except:
                extracted_text = ""
            if not extracted_text.strip():
                try:
                    result = easy_ocr.readtext(np_img, detail=0)
                    extracted_text = " ".join(result)
                except:
                    extracted_text = ""
        else:
            try:
                pixel_values = trocr_processor(images=img, return_tensors="pt").pixel_values
                generated_ids = trocr_model.generate(pixel_values)
                extracted_text = trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
            except:
                extracted_text = ""
            if not extracted_text.strip():
                extracted_text = pytesseract.image_to_string(img, lang="eng")

        if not extracted_text.strip():
            return "[No text detected in image]"

        return translate_message(extracted_text.strip(), target_lang)
    except Exception as e:
        return f"[OCR Error: {str(e)}]"

## ======================
# PDF extraction with OCR fallback
# ======================
import io
from PyPDF2 import PdfReader
import fitz  # PyMuPDF

def extract_text_from_pdf(pdf_bytes, target_lang, min_text_length=30):
    """
    Extract text from PDF:
    1. Try direct extraction.
    2. If text is too short or garbled → force OCR fallback.
    """
    extracted_text = ""

    # ---- Step 1: Direct text extraction ----
    try:
        reader = PdfReader(io.BytesIO(pdf_bytes))
        text_parts = []
        for page in reader.pages:
            t = page.extract_text()
            if t:
                text_parts.append(t.strip())
        extracted_text = "\n".join(text_parts).strip()
    except Exception as e:
        print(f"[Warning] Direct PDF text extraction failed: {e}")

    # ---- Step 2: Check quality ----
    if extracted_text and len(extracted_text) >= min_text_length:
        # If text looks real → return it
        return translate_message(extracted_text, target_lang)

    # ---- Step 3: Fallback → OCR ----
    try:
        doc = fitz.open(stream=pdf_bytes, filetype="pdf")
        ocr_texts = []
        for page_index in range(len(doc)):
            page = doc[page_index]

            # Render page as image (high resolution)
            pix = page.get_pixmap(dpi=300)
            img_bytes = io.BytesIO(pix.tobytes("png")).getvalue()

            # OCR + translation
            ocr_result = ocr_and_translate(img_bytes, target_lang)
            if ocr_result:
                ocr_texts.append(ocr_result.strip())

        return "\n".join(ocr_texts).strip() if ocr_texts else "[No text detected]"

    except Exception as e:
        return f"[PDF OCR Error: {str(e)}]"


# ======================
# Light UI styling
# ======================
display(HTML('''
<style>
.chat-box {
    height: 250px;
    overflow-y: auto;
    padding: 10px;
    border: 1px solid #ddd;
    background: #fefefe;
}
.userA-right { text-align: right; background: #0078D7; color: white; padding: 8px 10px; margin: 5px; border-radius: 12px; float: right; clear: both; }
.userA-left  { text-align: left;  background: #e1f5fe; color: black; padding: 8px 10px; margin: 5px; border-radius: 12px; float: left; clear: both; }
.userB-right { text-align: right; background: #4CAF50; color: white; padding: 8px 10px; margin: 5px; border-radius: 12px; float: right; clear: both; }
.userB-left  { text-align: left;  background: #c8e6c9; color: black; padding: 8px 10px; margin: 5px; border-radius: 12px; float: left; clear: both; }
</style>
'''))

# ======================
# Chat UI setup
# ======================
chat_a = widgets.HTML()
chat_b = widgets.HTML()
history_a = ""
history_b = ""

input_a = widgets.Textarea(placeholder='Type a message...', layout=widgets.Layout(width='95%', height='40px'))
input_b = widgets.Textarea(placeholder='Type a message...', layout=widgets.Layout(width='95%', height='40px'))

upload_a_img = widgets.FileUpload(accept='image/*', multiple=True, description='image')
upload_b_img = widgets.FileUpload(accept='image/*', multiple=True, description='image')
upload_a_pdf = widgets.FileUpload(accept='.pdf', multiple=True, description='pdf')
upload_b_pdf = widgets.FileUpload(accept='.pdf', multiple=True, description='pdf')

langs = ["English", "Hindi", "Gujarati", "Tamil", "Bengali", "Telugu", "Marathi", "Kannada", "Malayalam", "Arabic"]
lang_a = widgets.Dropdown(options=langs, value='English', layout=widgets.Layout(width='120px'))
lang_b = widgets.Dropdown(options=langs, value='Arabic', layout=widgets.Layout(width='120px'))

send_a = widgets.Button(description="Send", layout=widgets.Layout(width='60px'))
send_b = widgets.Button(description="Send", layout=widgets.Layout(width='60px'))

def image_to_html(fileinfo):
    content = fileinfo['content']
    img_b64 = b64encode(content).decode('utf-8')
    return f'<img src="data:image/png;base64,{img_b64}" width="100" />'

def pdf_to_html(fileinfo):
    pdf_name = fileinfo['metadata']['name'] if 'metadata' in fileinfo else "Unknown PDF"
    return f'<div><b>{pdf_name}</b></div>'

# ======================
# Sending logic
# ======================
def send_from_a(_):
    global history_a, history_b
    msg = input_a.value.strip()
    uploaded_img = list(upload_a_img.value.values())
    uploaded_pdf = list(upload_a_pdf.value.values())

    images_html = "".join([image_to_html(f) for f in uploaded_img])
    pdf_html = "".join([pdf_to_html(f) for f in uploaded_pdf])

    translated = ""
    if msg:
        translated = translate_message(msg, lang_b.value)
    elif uploaded_img and len(uploaded_img) == 1:
        translated = ocr_and_translate(uploaded_img[0]['content'], lang_b.value)
    elif uploaded_pdf and len(uploaded_pdf) == 1:
        translated = extract_text_from_pdf(uploaded_pdf[0]['content'], lang_b.value)

    if msg or uploaded_img or uploaded_pdf:
        history_a += f"<div class='userA-right'>User A: {msg}{images_html}{pdf_html}</div>"
        history_b += f"<div class='userA-left'>User A: {translated}</div>"

        chat_a.value = f"<div class='chat-box'>{history_a}</div>"
        chat_b.value = f"<div class='chat-box'>{history_b}</div>"

    input_a.value = ""
    upload_a_img.value.clear()
    upload_a_pdf.value.clear()

def send_from_b(_):
    global history_a, history_b
    msg = input_b.value.strip()
    uploaded_img = list(upload_b_img.value.values())
    uploaded_pdf = list(upload_b_pdf.value.values())

    images_html = "".join([image_to_html(f) for f in uploaded_img])
    pdf_html = "".join([pdf_to_html(f) for f in uploaded_pdf])

    translated = ""
    if msg:
        translated = translate_message(msg, lang_a.value)
    elif uploaded_img and len(uploaded_img) == 1:
        translated = ocr_and_translate(uploaded_img[0]['content'], lang_a.value)
    elif uploaded_pdf and len(uploaded_pdf) == 1:
        translated = extract_text_from_pdf(uploaded_pdf[0]['content'], lang_a.value)

    if msg or uploaded_img or uploaded_pdf:
        history_b += f"<div class='userB-right'>User B: {msg}{images_html}{pdf_html}</div>"
        history_a += f"<div class='userB-left'>User B: {translated}</div>"

        chat_b.value = f"<div class='chat-box'>{history_b}</div>"
        chat_a.value = f"<div class='chat-box'>{history_a}</div>"

    input_b.value = ""
    upload_b_img.value.clear()
    upload_b_pdf.value.clear()

send_a.on_click(send_from_a)
send_b.on_click(send_from_b)

# ======================
# Final Layout
# ======================
panel_a = widgets.VBox([
    widgets.HTML("<b>User A</b>"),
    chat_a,
    input_a,
    widgets.HBox([upload_a_img, upload_a_pdf, lang_a, send_a], layout=widgets.Layout(justify_content='space-between', width='95%'))
], layout=widgets.Layout(width='100%'))

panel_b = widgets.VBox([
    widgets.HTML("<b>User B</b>"),
    chat_b,
    input_b,
    widgets.HBox([upload_b_img, upload_b_pdf, lang_b, send_b], layout=widgets.Layout(justify_content='space-between', width='95%'))
], layout=widgets.Layout(width='100%'))

layout = widgets.HBox([
    widgets.VBox([panel_a], layout=widgets.Layout(width='50%')),
    widgets.VBox([panel_b], layout=widgets.Layout(width='50%'))
], layout=widgets.Layout(width='100%'))

display(layout)


[32mCreating model: ('PP-LCNet_x1_0_doc_ori', None)[0m
[32mUsing official model (PP-LCNet_x1_0_doc_ori), the model files will be automatically downloaded and saved in /root/.paddlex/official_models.[0m


Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

[32mCreating model: ('UVDoc', None)[0m
[32mUsing official model (UVDoc), the model files will be automatically downloaded and saved in /root/.paddlex/official_models.[0m


Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

[32mCreating model: ('PP-LCNet_x1_0_textline_ori', None)[0m
[32mUsing official model (PP-LCNet_x1_0_textline_ori), the model files will be automatically downloaded and saved in /root/.paddlex/official_models.[0m


Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

[32mCreating model: ('PP-OCRv3_mobile_det', None)[0m
[32mUsing official model (PP-OCRv3_mobile_det), the model files will be automatically downloaded and saved in /root/.paddlex/official_models.[0m


Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

[32mCreating model: ('arabic_PP-OCRv3_mobile_rec', None)[0m
[32mUsing official model (arabic_PP-OCRv3_mobile_rec), the model files will be automatically downloaded and saved in /root/.paddlex/official_models.[0m


Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

Some weights of VisionEncoderDecoderModel were not initialized from the model checkpoint at microsoft/trocr-base-printed and are newly initialized: ['encoder.pooler.dense.bias', 'encoder.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(VBox(children=(VBox(children=(HTML(value='<b>User A</b>'), HTML(value=''), Textarea(value='', l…

  result = paddle_ocr.ocr(np_img, cls=True)


below code is perfect for text/image and pdfs

In [33]:
import ipywidgets as widgets
from IPython.display import display, HTML
from base64 import b64encode
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from langdetect import detect
from PIL import Image
import pytesseract
import io
import torch
from PyPDF2 import PdfReader
import re
import numpy as np

# OCR Engines
from paddleocr import PaddleOCR
import easyocr
from transformers import TrOCRProcessor, VisionEncoderDecoderModel

# ======================
# Load NLLB model
# ======================
model_name = "facebook/nllb-200-distilled-600M"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# ======================
# Load OCR models once
# ======================
paddle_ocr = PaddleOCR(lang='ar')       # Arabic OCR
easy_ocr = easyocr.Reader(['ar'])       # Fallback Arabic
trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")

# ======================
# Language code mappings
# ======================
lang_code_map = {
    "English": "eng_Latn", "Hindi": "hin_Deva", "Gujarati": "guj_Gujr",
    "Tamil": "tam_Taml", "Bengali": "ben_Beng", "Telugu": "tel_Telu",
    "Marathi": "mar_Deva", "Kannada": "kan_Knda", "Malayalam": "mal_Mlym",
    "Arabic": "arb_Arab"
}

lang_detect_map = {
    "en": "eng_Latn", "hi": "hin_Deva", "gu": "guj_Gujr", "ta": "tam_Taml",
    "bn": "ben_Beng", "te": "tel_Telu", "mr": "mar_Deva", "kn": "kan_Knda",
    "ml": "mal_Mlym", "ar": "arb_Arab"
}

# ======================
# Utilities
# ======================
def _safe_detect(text: str) -> str:
    try:
        return detect(text)
    except Exception:
        return "en"

def _chunk_text(s: str, max_chars: int = 1200):
    s = re.sub(r'\s+', ' ', s).strip()
    if len(s) <= max_chars:
        return [s]
    chunks, buf = [], []
    total = 0
    for part in re.split(r'(?<=[.!?])\s+', s):
        if total + len(part) + 1 > max_chars and buf:
            chunks.append(' '.join(buf))
            buf, total = [part], len(part)
        else:
            buf.append(part)
            total += len(part) + 1
    if buf:
        chunks.append(' '.join(buf))
    return chunks

def detect_script(text: str) -> str:
    """Roughly detect Arabic vs English script"""
    arabic_chars = re.findall(r'[\u0600-\u06FF]', text)
    if len(arabic_chars) > 0:
        return "ar"
    return "en"

# ======================
# Translation function
# ======================
def translate_message(message, target_lang):
    try:
        if not message or not message.strip():
            return "[No text to translate]"
        source_lang_code = _safe_detect(message)
        src_lang = lang_detect_map.get(source_lang_code, "eng_Latn")
        tgt_lang = lang_code_map.get(target_lang, "eng_Latn")

        tokenizer.src_lang = src_lang
        translated_chunks = []
        for chunk in _chunk_text(message, max_chars=1200):
            inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=900)
            generated_tokens = model.generate(
                **inputs,
                forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
                max_length=512
            )
            translated_chunks.append(tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0])
        return " ".join(translated_chunks).strip()
    except Exception as e:
        return f"[Translation Error: {str(e)}]"

# ======================
# OCR + Translate from image
# ======================
def ocr_and_translate(image_bytes, target_lang):
    try:
        img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
        np_img = np.array(img)

        # Quick check with tesseract for script
        quick_text = pytesseract.image_to_string(img, lang="ara+eng").strip()
        script = detect_script(quick_text) if quick_text else "en"

        extracted_text = ""
        if script == "ar":
            try:
                result = paddle_ocr.ocr(np_img, cls=True)
                extracted_text = " ".join([line[1][0] for line in result[0]])
            except:
                extracted_text = ""
            if not extracted_text.strip():
                try:
                    result = easy_ocr.readtext(np_img, detail=0)
                    extracted_text = " ".join(result)
                except:
                    extracted_text = ""
        else:
            try:
                pixel_values = trocr_processor(images=img, return_tensors="pt").pixel_values
                generated_ids = trocr_model.generate(pixel_values)
                extracted_text = trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
            except:
                extracted_text = ""
            if not extracted_text.strip():
                extracted_text = pytesseract.image_to_string(img, lang="eng")

        if not extracted_text.strip():
            return "[No text detected in image]"

        return translate_message(extracted_text.strip(), target_lang)
    except Exception as e:
        return f"[OCR Error: {str(e)}]"

## ======================
# PDF extraction with OCR fallback
# ======================
import io
import fitz  # PyMuPDF
from PIL import Image
import pytesseract

def extract_text_from_pdf(pdf_bytes, target_lang):
    """
    Force OCR for each page in the PDF with high-quality rendering.
    """
    try:
        doc = fitz.open(stream=pdf_bytes, filetype="pdf")
        ocr_texts = []
        for pno in range(len(doc)):
            page = doc[pno]

            # Render page at 400 DPI for sharper text
            mat = fitz.Matrix(4, 4)  # scale factor (4 → ~400 DPI)
            pix = page.get_pixmap(matrix=mat, alpha=False)

            # Convert to PIL Image
            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)

            # OCR with explicit settings
            ocr_result = pytesseract.image_to_string(
                img,
                lang="eng+ara",  # change depending on your needs
                config="--psm 6 --oem 3"  # treat image as block of text
            )

            if ocr_result.strip():
                # Translate result with your existing translate_message
                translated = translate_message(ocr_result.strip(), target_lang)
                ocr_texts.append(translated)

        return "\n".join(ocr_texts).strip() if ocr_texts else "[No text detected]"

    except Exception as e:
        return f"[PDF OCR Error: {str(e)}]"


# ======================
# Light UI styling
# ======================
display(HTML('''
<style>
.chat-box {
    height: 250px;
    overflow-y: auto;
    padding: 10px;
    border: 1px solid #ddd;
    background: #fefefe;
}
.userA-right { text-align: right; background: #0078D7; color: white; padding: 8px 10px; margin: 5px; border-radius: 12px; float: right; clear: both; }
.userA-left  { text-align: left;  background: #e1f5fe; color: black; padding: 8px 10px; margin: 5px; border-radius: 12px; float: left; clear: both; }
.userB-right { text-align: right; background: #4CAF50; color: white; padding: 8px 10px; margin: 5px; border-radius: 12px; float: right; clear: both; }
.userB-left  { text-align: left;  background: #c8e6c9; color: black; padding: 8px 10px; margin: 5px; border-radius: 12px; float: left; clear: both; }
</style>
'''))

# ======================
# Chat UI setup
# ======================
chat_a = widgets.HTML()
chat_b = widgets.HTML()
history_a = ""
history_b = ""

input_a = widgets.Textarea(placeholder='Type a message...', layout=widgets.Layout(width='95%', height='40px'))
input_b = widgets.Textarea(placeholder='Type a message...', layout=widgets.Layout(width='95%', height='40px'))

upload_a_img = widgets.FileUpload(accept='image/*', multiple=True, description='image')
upload_b_img = widgets.FileUpload(accept='image/*', multiple=True, description='image')
upload_a_pdf = widgets.FileUpload(accept='.pdf', multiple=True, description='pdf')
upload_b_pdf = widgets.FileUpload(accept='.pdf', multiple=True, description='pdf')

langs = ["English", "Hindi", "Gujarati", "Tamil", "Bengali", "Telugu", "Marathi", "Kannada", "Malayalam", "Arabic"]
lang_a = widgets.Dropdown(options=langs, value='English', layout=widgets.Layout(width='120px'))
lang_b = widgets.Dropdown(options=langs, value='Arabic', layout=widgets.Layout(width='120px'))

send_a = widgets.Button(description="Send", layout=widgets.Layout(width='60px'))
send_b = widgets.Button(description="Send", layout=widgets.Layout(width='60px'))

def image_to_html(fileinfo):
    content = fileinfo['content']
    img_b64 = b64encode(content).decode('utf-8')
    return f'<img src="data:image/png;base64,{img_b64}" width="100" />'

def pdf_to_html(fileinfo):
    pdf_name = fileinfo['metadata']['name'] if 'metadata' in fileinfo else "Unknown PDF"
    return f'<div><b>{pdf_name}</b></div>'

# ======================
# Sending logic
# ======================
def send_from_a(_):
    global history_a, history_b
    msg = input_a.value.strip()
    uploaded_img = list(upload_a_img.value.values())
    uploaded_pdf = list(upload_a_pdf.value.values())

    images_html = "".join([image_to_html(f) for f in uploaded_img])
    pdf_html = "".join([pdf_to_html(f) for f in uploaded_pdf])

    translated = ""
    if msg:
        translated = translate_message(msg, lang_b.value)
    elif uploaded_img and len(uploaded_img) == 1:
        translated = ocr_and_translate(uploaded_img[0]['content'], lang_b.value)
    elif uploaded_pdf and len(uploaded_pdf) == 1:
        translated = extract_text_from_pdf(uploaded_pdf[0]['content'], lang_b.value)

    if msg or uploaded_img or uploaded_pdf:
        history_a += f"<div class='userA-right'>User A: {msg}{images_html}{pdf_html}</div>"
        history_b += f"<div class='userA-left'>User A: {translated}</div>"

        chat_a.value = f"<div class='chat-box'>{history_a}</div>"
        chat_b.value = f"<div class='chat-box'>{history_b}</div>"

    input_a.value = ""
    upload_a_img.value.clear()
    upload_a_pdf.value.clear()

def send_from_b(_):
    global history_a, history_b
    msg = input_b.value.strip()
    uploaded_img = list(upload_b_img.value.values())
    uploaded_pdf = list(upload_b_pdf.value.values())

    images_html = "".join([image_to_html(f) for f in uploaded_img])
    pdf_html = "".join([pdf_to_html(f) for f in uploaded_pdf])

    translated = ""
    if msg:
        translated = translate_message(msg, lang_a.value)
    elif uploaded_img and len(uploaded_img) == 1:
        translated = ocr_and_translate(uploaded_img[0]['content'], lang_a.value)
    elif uploaded_pdf and len(uploaded_pdf) == 1:
        translated = extract_text_from_pdf(uploaded_pdf[0]['content'], lang_a.value)

    if msg or uploaded_img or uploaded_pdf:
        history_b += f"<div class='userB-right'>User B: {msg}{images_html}{pdf_html}</div>"
        history_a += f"<div class='userB-left'>User B: {translated}</div>"

        chat_b.value = f"<div class='chat-box'>{history_b}</div>"
        chat_a.value = f"<div class='chat-box'>{history_a}</div>"

    input_b.value = ""
    upload_b_img.value.clear()
    upload_b_pdf.value.clear()

send_a.on_click(send_from_a)
send_b.on_click(send_from_b)

# ======================
# Final Layout
# ======================
panel_a = widgets.VBox([
    widgets.HTML("<b>User A</b>"),
    chat_a,
    input_a,
    widgets.HBox([upload_a_img, upload_a_pdf, lang_a, send_a], layout=widgets.Layout(justify_content='space-between', width='95%'))
], layout=widgets.Layout(width='100%'))

panel_b = widgets.VBox([
    widgets.HTML("<b>User B</b>"),
    chat_b,
    input_b,
    widgets.HBox([upload_b_img, upload_b_pdf, lang_b, send_b], layout=widgets.Layout(justify_content='space-between', width='95%'))
], layout=widgets.Layout(width='100%'))

layout = widgets.HBox([
    widgets.VBox([panel_a], layout=widgets.Layout(width='50%')),
    widgets.VBox([panel_b], layout=widgets.Layout(width='50%'))
], layout=widgets.Layout(width='100%'))

display(layout)


[32mCreating model: ('PP-LCNet_x1_0_doc_ori', None)[0m
[32mUsing official model (PP-LCNet_x1_0_doc_ori), the model files will be automatically downloaded and saved in /root/.paddlex/official_models.[0m


Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

[32mCreating model: ('UVDoc', None)[0m
[32mUsing official model (UVDoc), the model files will be automatically downloaded and saved in /root/.paddlex/official_models.[0m


Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

[32mCreating model: ('PP-LCNet_x1_0_textline_ori', None)[0m
[32mUsing official model (PP-LCNet_x1_0_textline_ori), the model files will be automatically downloaded and saved in /root/.paddlex/official_models.[0m


Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

[32mCreating model: ('PP-OCRv3_mobile_det', None)[0m
[32mUsing official model (PP-OCRv3_mobile_det), the model files will be automatically downloaded and saved in /root/.paddlex/official_models.[0m


Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

[32mCreating model: ('arabic_PP-OCRv3_mobile_rec', None)[0m
[32mUsing official model (arabic_PP-OCRv3_mobile_rec), the model files will be automatically downloaded and saved in /root/.paddlex/official_models.[0m


Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

Some weights of VisionEncoderDecoderModel were not initialized from the model checkpoint at microsoft/trocr-base-printed and are newly initialized: ['encoder.pooler.dense.bias', 'encoder.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(VBox(children=(VBox(children=(HTML(value='<b>User A</b>'), HTML(value=''), Textarea(value='', l…