# Package Installation

In [2]:
!pip install deep-translator
!pip install gtts
!pip install playsound


Collecting deep-translator
  Downloading deep_translator-1.11.4-py3-none-any.whl.metadata (30 kB)
Downloading deep_translator-1.11.4-py3-none-any.whl (42 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.3/42.3 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deep-translator
Successfully installed deep-translator-1.11.4
Collecting gtts
  Downloading gTTS-2.5.2-py3-none-any.whl.metadata (4.1 kB)
Downloading gTTS-2.5.2-py3-none-any.whl (29 kB)
Installing collected packages: gtts
Successfully installed gtts-2.5.2
Collecting playsound
  Downloading playsound-1.3.0.tar.gz (7.7 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: playsound
  Building wheel for playsound (setup.py) ... [?25l[?25hdone
  Created wheel for playsound: filename=playsound-1.3.0-py3-none-any.whl size=7020 sha256=cbb169b18af43ff82b5569ba56cfbab4aa8c7e95dcba422826d5b6e86712b6e7
  Stored in directory: /root/.cache/pip/whe

# Preprocessing Function

In [3]:
import re

#clean and format the text
def preprocess_text(text):
    # Remove extra spaces, newlines, and special characters
    text = re.sub(r'\s+', ' ', text)  # Replace multiple spaces with a single space
    text = re.sub(r'\n', ' ', text)  # Replace newlines with a space
    text = re.sub(r'[^\w\s.,!?]', '', text)  # Remove special characters except common punctuation
    text = text.strip()  # Remove leading and trailing spaces
    return text


# Translation Function

In [4]:
from deep_translator import GoogleTranslator

# Translation function
def translate_text(text, source_language, target_language):
    translator = GoogleTranslator(source=source_language, target=target_language)
    translation = translator.translate(text)
    return translation


# Text-to-Speech (TTS) Function

In [5]:
from gtts import gTTS

# Text-to-Speech (TTS) function
def text_to_speech(text, language_code):
    tts = gTTS(text=text, lang=language_code)
    tts.save("output.mp3")
    print("Voice-over audio saved as 'output.mp3'")


# Audio Playback Function

In [6]:
from IPython.display import Audio, display

# Function to play the audio
def play_audio(file_path):
    display(Audio(file_path, autoplay=True))


# Full Pipeline Function

In [7]:
# Full Pipeline function
def process_document(document, source_language, target_language):
    # Step 1: Preprocess the document text
    cleaned_text = preprocess_text(document)
    print("Cleaned Text:", cleaned_text)

    # Step 2: Translate the cleaned text
    translated_text = translate_text(cleaned_text, source_language, target_language)
    print("Translated Text:", translated_text)

    # Step 3: Convert translated text to speech
    text_to_speech(translated_text, target_language)

    # Step 4: Play the audio
    play_audio("output.mp3")


# User Input and Execution

In [8]:
# Prompt the user to enter the source language code
source_language = input("Enter the source language code (e.g., 'en' for English, 'es' for Spanish): ").strip()

# Prompt the user to enter the document text
document = input("Enter the document text: ")

# Prompt the user to enter the target language code
target_language = input("Enter the target language code (e.g., 'en' for English, 'ta' for Tamil): ").strip()

# Process the document with the chosen languages
process_document(document, source_language, target_language)


Enter the source language code (e.g., 'en' for English, 'es' for Spanish): en
Enter the document text: hello friends
Enter the target language code (e.g., 'en' for English, 'ta' for Tamil): ta
Cleaned Text: hello friends
Translated Text: வணக்கம் நண்பர்களே
Voice-over audio saved as 'output.mp3'


In [13]:
import pickle
from deep_translator import GoogleTranslator

# Function to save the translation model to a pickle file
def save_model():
    translator = GoogleTranslator(source=source_language, target=target_language)
    with open('translator.pkl', 'wb') as f:
        pickle.dump(translator, f)
    print("Model saved as translator.pkl")

save_model()


Model saved as translator.pkl
