In [None]:
import os
import io
import pytesseract
from PIL import Image
from transformers import pipeline
from gtts import gTTS, lang
from google.colab import files

# --- STEP 1: INSTALL NECESSARY LIBRARIES ---
print("Step 1: Installing required libraries...")
try:
    # Update package lists and install Tesseract OCR with language packs
    !apt-get update -qq > /dev/null
    !apt-get install -y -qq tesseract-ocr tesseract-ocr-hin tesseract-ocr-spa tesseract-ocr-fra > /dev/null

    # Install Python packages quietly
    !pip install -q pytesseract Pillow transformers torch gTTS

    # Use a specific, more robust installation for py-braille
    !pip install -q py-braille

    print("Installation complete.\n")
except Exception as e:
    print(f"An error occurred during installation: {e}")
    print("Please check the output for specific installation failures.")

# Dynamically import py_braille after installation attempt
try:
    from py_braille import convert
    braille_module_available = True
    print("'py_braille' module successfully imported.")
except ImportError:
    braille_module_available = False
    print("Warning: 'py_braille' module not found. Braille conversion will be skipped.")

Step 1: Installing required libraries...
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
[31mERROR: Could not find a version that satisfies the requirement py-braille (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for py-braille[0m[31m
[0mInstallation complete.



In [None]:
# --- STEP 2: SETUP MODELS AND LANGUAGES ---
print("\nStep 2: Setting up models and language options...")
# Load pre-trained model for summarization
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")

# Define supported translation languages and their corresponding Hugging Face model IDs
supported_languages = {
    'hi': 'Hindi',
    'fr': 'French',
    'es': 'Spanish',
    'de': 'German',
    'it': 'Italian',
    'ja': 'Japanese',
    'ru': 'Russian',
}

translation_models = {
    'hi': 'Helsinki-NLP/opus-mt-en-hi',
    'fr': 'Helsinki-NLP/opus-mt-en-fr',
    'es': 'Helsinki-NLP/opus-mt-en-es',
    'de': 'Helsinki-NLP/opus-mt-en-de',
    'it': 'Helsinki-NLP/opus-mt-en-it',
    'ja': 'Helsinki-NLP/opus-mt-en-ja',
    'ru': 'Helsinki-NLP/opus-mt-en-ru',
}
print("Setup complete.\n")


Step 2: Setting up models and language options...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

Device set to use cpu


Setup complete.



In [None]:
# --- STEP 3: UPLOAD IMAGE AND PERFORM OCR ---
print("Step 3: Uploading image and performing OCR...")
extracted_text = ""
try:
    print("Please upload an image file containing text.")
    uploaded = files.upload()
    image_path = next(iter(uploaded))
    print(f"File '{image_path}' uploaded successfully.")

    image = Image.open(io.BytesIO(uploaded[image_path]))
    extracted_text = pytesseract.image_to_string(image)

    print("\n--- Extracted Text (English) ---\n")
    print(extracted_text)

except Exception as e:
    print(f"Error during OCR: {e}")

Step 3: Uploading image and performing OCR...
Please upload an image file containing text.


Saving Jay Shetty Wisdom Quotes _ Prayer for July! _ Facebook.jpeg to Jay Shetty Wisdom Quotes _ Prayer for July! _ Facebook.jpeg
File 'Jay Shetty Wisdom Quotes _ Prayer for July! _ Facebook.jpeg' uploaded successfully.

--- Extracted Text (English) ---

Begin July with this prayer

Dear God, As July begins, we place this
month in Your care. Fill our hearts with
peace, our minds with clarity, and our days
with purpose. Help us move forward with
courage, release what no longer serves us, and
trust Your timing. May we find joy in simple
moments and strength in every challenge. Let
July be a month of healing, growth, and quiet
blessings—for us and for the world. Amen.

Mindfulness



In [None]:
# --- STEP 4: SUMMARIZE THE EXTRACTED TEXT ---
print("\nStep 4: Summarizing the text...")
summarized_text = extracted_text
if not extracted_text.strip():
    print("No text to summarize. Skipping...")
else:
    summary_length = len(extracted_text.split())
    min_sum_length = int(summary_length * 0.1)
    max_sum_length = int(summary_length * 0.3)

    try:
        if len(extracted_text.strip()) > 50:
            summary = summarizer(extracted_text, max_length=max_sum_length, min_length=min_sum_length, do_sample=False)
            summarized_text = summary[0]['summary_text']
            print("\n--- Summarized Text (English) ---\n")
            print(summarized_text)
        else:
            print("\n--- Text is too short to summarize. Using original text. ---\n")
            print(summarized_text)
    except Exception as e:
        print(f"Error during summarization: {e}")


Step 4: Summarizing the text...

--- Summarized Text (English) ---

 Begin July with this prayer: "Fill our hearts with peace, our minds with clarity, and our


In [None]:
# --- STEP 5: SELECT LANGUAGE AND TRANSLATE TO AUDIO ---
print("\nStep 5: Translating and generating audio...")
translated_audio_file = None
if not summarized_text.strip():
    print("No summarized text to translate. Skipping...")
else:
    print("\n--- Select a language for translation ---")
    for code, name in supported_languages.items():
        print(f"  - Enter '{code}' for {name}")

    language_choice = input("\nYour choice (e.g., hi, fr, es): ").strip().lower()

    if language_choice in supported_languages:
        target_lang_name = supported_languages[language_choice]
        model_name = translation_models[language_choice]

        try:
            translator = pipeline("translation_en_to_xx", model=model_name)

            print(f"\n--- Translating to {target_lang_name}... ---")
            translation = translator(summarized_text, max_length=len(summarized_text)*2)
            translated_text = translation[0]['translation_text']
            print(f"\n--- Translated Text ({target_lang_name}) ---\n")
            print(translated_text)

            print(f"\n--- Generating {target_lang_name} audio file... ---")
            tts_audio = gTTS(translated_text, lang=language_choice, slow=False)
            audio_filename = f"summarized_{language_choice}.mp3"
            tts_audio.save(audio_filename)
            print(f"Audio file '{audio_filename}' saved.")

            translated_audio_file = audio_filename

        except Exception as e:
            print(f"Error during translation or audio generation for {target_lang_name}: {e}")
    else:
        print("Invalid language choice. Skipping translation and audio generation.")


Step 5: Translating and generating audio...

--- Select a language for translation ---
  - Enter 'hi' for Hindi
  - Enter 'fr' for French
  - Enter 'es' for Spanish
  - Enter 'de' for German
  - Enter 'it' for Italian
  - Enter 'ja' for Japanese
  - Enter 'ru' for Russian

Your choice (e.g., hi, fr, es): hi


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/306M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/306M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/812k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/1.07M [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

Device set to use cpu



--- Translating to Hindi... ---

--- Translated Text (Hindi) ---

इस प्रार्थना के साथ जुलाई को यह प्रार्थना सुनाई दे, कि हमारे हृदय और मन शुद्ध और शुद्ध हो जाएं।

--- Generating Hindi audio file... ---
Audio file 'summarized_hi.mp3' saved.


In [None]:
# --- STEP 6: MANUAL BRAILLE CONVERSION ---
braille_filename = None
if summarized_text.strip():
    print("\nStep 6: Manually converting to Braille...")

    # A simple dictionary for English Braille contractions and letters
    braille_dict = {
        'a': '⠁', 'b': '⠃', 'c': '⠉', 'd': '⠙', 'e': '⠑', 'f': '⠋',
        'g': '⠛', 'h': '⠓', 'i': '⠊', 'j': '⠚', 'k': '⠅', 'l': '⠇',
        'm': '⠍', 'n': '⠝', 'o': '⠕', 'p': '⠏', 'q': '⠟', 'r': '⠗',
        's': '⠎', 't': '⠞', 'u': '⠥', 'v': '⠧', 'w': '⠺', 'x': '⠭',
        'y': '⠽', 'z': '⠵', ' ': ' ', ',': '⠂', '.': '⠐', '!': '⠖',
        '?': '⠢', '-': '⠤', "'": '⠄',
        'and': '⠯', 'for': '⠿', 'of': '⠷', 'the': '⠮', 'with': '⠾',
    }

    # A simple function to perform the conversion
    def convert_to_braille(text):
        braille_output = ""
        # Convert to lowercase to handle casing
        words = text.lower().split()
        for word in words:
            temp_word = word
            # Handle common contractions first
            for contraction, braille_char in braille_dict.items():
                if len(contraction) > 1 and contraction in temp_word:
                    temp_word = temp_word.replace(contraction, braille_char)
            # Handle individual letters
            for char in temp_word:
                if char in braille_dict:
                    braille_output += braille_dict[char]
                else:
                    braille_output += char # Keep non-Braille characters as is
            braille_output += " " # Add space between words
        return braille_output.strip()

    manual_braille_text = convert_to_braille(summarized_text)

    print("\n--- Braille Text (Manual Conversion) ---\n")
    print(manual_braille_text)
    print("Braille text has been displayed above. A file will not be saved.")


Step 6: Manually converting to Braille...

--- Braille Text (Manual Conversion) ---

⠃⠑⠛⠊⠝ ⠚⠥⠇⠽ ⠾ ⠞⠓⠊⠎ ⠏⠗⠁⠽⠑⠗: "⠋⠊⠇⠇ ⠕⠥⠗ ⠓⠑⠁⠗⠞⠎ ⠾ ⠏⠑⠁⠉⠑⠂ ⠕⠥⠗ ⠍⠊⠝⠙⠎ ⠾ ⠉⠇⠁⠗⠊⠞⠽⠂ ⠯ ⠕⠥⠗
Braille text has been displayed above. A file will not be saved.


In [None]:
# --- STEP 7: PROVIDE DOWNLOAD LINKS ---
print("\nStep 7: Providing download links...")
print("\n--- Downloads ---")
if 'translated_audio_file' in locals() and translated_audio_file and os.path.exists(translated_audio_file):
    files.download(translated_audio_file)
    print(f"Download link for '{translated_audio_file}' created.")

print("\nProcess complete.")


Step 7: Providing download links...

--- Downloads ---


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Download link for 'summarized_hi.mp3' created.

Process complete.
