<a href="https://colab.research.google.com/github/RohanGforwork/ML_text_summarization/blob/main/Translation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:

!pip install googletrans==4.0.0-rc1 better_profanity nltk



Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting better_profanity
  Downloading better_profanity-0.7.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading httpx-0.13.3-py3-none-any.whl.metadata (25 kB)
Collecting hstspreload (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading hstspreload-2025.1.1-py3-none-any.whl.metadata (2.1 kB)
Collecting chardet==3.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading chardet-3.0.4-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting idna==2.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading idna-2.10-py2.py3-none-any.whl.metadata (9.1 kB)
Collecting rfc3986<2,>=1.3 (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading rfc3986-1.5.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting httpcore==0.9.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading httpcore-0.9.1-py3-

Language translation


In [4]:
import nltk
from googletrans import Translator
from better_profanity import profanity
from nltk.corpus import wordnet

# Download required NLTK dataset
nltk.download('wordnet')

class LanguageTranslator:
    def __init__(self):
        self.translator = Translator()

    def is_slang(self, text):
        """
        Checks if the given text contains slang or informal words.
        :param text: The input text.
        :return: Boolean (True if slang is detected, False otherwise)
        """
        words = text.split()
        for word in words:
            if not wordnet.synsets(word):
                return True  # If no dictionary meaning, assume slang
            if profanity.contains_profanity(word):
                return True  # If flagged as profane/slang, return True
        return False

    def detect_and_translate(self, text):
        """
        Detects the language, translates to English if needed, and checks for slang.
        :param text: User input text.
        :return: Dictionary with detected language, confidence, translation, and slang detection.
        """
        detected = self.translator.detect(text)

        # Handle NoneType confidence score
        confidence_score = round(detected.confidence, 2) if detected.confidence else "Unknown"

        response = {
            "detected_language": detected.lang,
            "confidence": confidence_score,
            "original_text": text,
            "translated_text": text,
            "contains_slang": self.is_slang(text)
        }

        if detected.lang != 'en':
            translated = self.translator.translate(text, dest='en')
            response["translated_text"] = translated.text

        return response

    def process_for_summarizer(self, text):
        """
        Translates the text to English before sending it to the summarizer.
        :param text: Input text.
        :return: Translated text (ready for summarization).
        """
        result = self.detect_and_translate(text)
        print(f"Processed Text for Summarization: {result['translated_text']}")
        return result['translated_text']  # Return translated text only

# Example usage (for testing)
if __name__ == "__main__":
    translator = LanguageTranslator()
    sample_text = input("Enter text: ")
    translated_text = translator.process_for_summarizer(sample_text)
    print(f"Final Output (For Summarization): {translated_text}")


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Enter text: "El director la cagó"
Processed Text for Summarization: "The director screwed up"
Final Output (For Summarization): "The director screwed up"


In [None]:
"""
Integration part for front end



from translator import LanguageTranslator

translator = LanguageTranslator()

user_message = "Bruh, this party was dope!"
translation_result = translator.detect_and_translate(user_message)

print(f"Detected: {translation_result['detected_language']} (Confidence: {translation_result['confidence'] * 100}%)")
print(f"Translated: {translation_result['translated_text']}")
print(f"Slang Detected? {translation_result['contains_slang']}")
"""
