<a href="https://colab.research.google.com/github/RohanGforwork/ML_text_summarization/blob/main/Translation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

!pip install googletrans==4.0.0-rc1 better_profanity nltk
!pip install wordfilter



Language translation


In [11]:
import nltk
from googletrans import Translator
from better_profanity import profanity
from wordfilter import Wordfilter
from nltk.corpus import wordnet

# Download required NLTK dataset
nltk.download('wordnet')

# Initialize word filter (for multiple languages)
word_filter = Wordfilter()

# Extend profanity filter with Indian slang
CUSTOM_PROFANITY = [
    "chutiya", "bhosdike", "gaandu", "madarchod", "behnchod",
    "harami", "randi", "lund", "chodu", "kamina", "kutti"
]
profanity.add_censor_words(CUSTOM_PROFANITY)

class LanguageTranslator:
    def __init__(self):
        self.translator = Translator()

    def is_slang_or_profanity(self, text):
        """
        Detects slang or profanity in any language.
        :param text: Input text.
        :return: Boolean (True if slang/profanity detected, False otherwise)
        """
        words = text.split()
        for word in words:
            if not wordnet.synsets(word):
                return True  # If no dictionary meaning, assume slang
            if profanity.contains_profanity(word):
                return True  # If flagged as profane, return True
            if word_filter.blacklisted(word):
                return True  # If word is blacklisted, return True
        return False

    def detect_and_translate(self, text):
        """
        Detects language, translates to English, and flags slang/profanity.
        :param text: User input.
        :return: Dictionary with detected language, confidence, translation, and slang detection.
        """
        detected = self.translator.detect(text)

        # Handle NoneType confidence score
        confidence_score = round(detected.confidence, 2) if detected.confidence else "Unknown"

        translated_text = text  # Default to original text

        if detected.lang != 'en':
            translated = self.translator.translate(text, dest='en')
            translated_text = translated.text  # Standard translation

        response = {
            "detected_language": detected.lang,
            "confidence": confidence_score,
            "original_text": text,
            "translated_text": translated_text,
            "contains_slang_or_profanity": self.is_slang_or_profanity(text)
        }

        return response

    def process_for_summarizer(self, text):
        """
        Translates text to English before sending it to the summarizer.
        :param text: Input text.
        :return: Translated text (ready for summarization).
        """
        result = self.detect_and_translate(text)
        print(f"Processed Text for Summarization: {result['translated_text']}")
        return result['translated_text']  # Return translated text only

# Example usage (for testing)
if __name__ == "__main__":
    translator = LanguageTranslator()
    sample_text = input("Enter text: ")
    translated_text = translator.process_for_summarizer(sample_text)
    print(f"Final Output (For Summarization): {translated_text}")


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Enter text: "madarchod kya kar raha hai
Processed Text for Summarization: "What is Madarachod doing
Final Output (For Summarization): "What is Madarachod doing


In [None]:
"""
Integration part for front end



from translator import LanguageTranslator

translator = LanguageTranslator()

user_message = "Bruh, this party was dope!"
translation_result = translator.detect_and_translate(user_message)

print(f"Detected: {translation_result['detected_language']} (Confidence: {translation_result['confidence'] * 100}%)")
print(f"Translated: {translation_result['translated_text']}")
print(f"Slang Detected? {translation_result['contains_slang']}")
"""
