<a href="https://colab.research.google.com/github/RohanGforwork/ML_text_summarization/blob/main/Translation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#Necessary Modules!!!
!pip install googletrans==4.0.0-rc1 better_profanity nltk
!pip install wordfilter



Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting better_profanity
  Downloading better_profanity-0.7.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading httpx-0.13.3-py3-none-any.whl.metadata (25 kB)
Collecting hstspreload (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading hstspreload-2025.1.1-py3-none-any.whl.metadata (2.1 kB)
Collecting chardet==3.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading chardet-3.0.4-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting idna==2.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading idna-2.10-py2.py3-none-any.whl.metadata (9.1 kB)
Collecting rfc3986<2,>=1.3 (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading rfc3986-1.5.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting httpcore==0.9.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading httpcore-0.9.1-py3-

Collecting wordfilter
  Downloading wordfilter-0.2.7-py3-none-any.whl.metadata (3.2 kB)
Downloading wordfilter-0.2.7-py3-none-any.whl (4.4 kB)
[31mERROR: Operation cancelled by user[0m[31m
[0m^C


Language translation


In [4]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from googletrans import Translator
from scipy.special import softmax
import re

class TextProcessor:
    def __init__(self, model_name="cardiffnlp/twitter-roberta-base-sentiment"):
        """
        Initializes the translator and sentiment analyzer.
        """
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name).to(self.device)
        self.model.eval()
        self.translator = Translator()
        self.labels = ["Negative", "Neutral", "Positive"]

    def detect_and_translate(self, text):
        """
        Detects language and translates text to English if necessary.
        """
        detected = self.translator.detect(text)
        detected_lang = detected.lang
        confidence = round(detected.confidence, 2) if detected.confidence else 1.0

        if detected_lang != "en":
            translated_text = self.translator.translate(text, dest="en").text
        else:
            translated_text = text

        return {
            "detected_language": detected_lang,
            "confidence": confidence,
            "translated_text": translated_text
        }

    def extract_topic(self, text):
        """
        Extracts the topic from the given text using simple heuristics.
        """
        sentences = text.split(". ")
        for sentence in sentences:
            words = sentence.split()
            if len(words) > 3:
                return " ".join(words[:4])  # Extract first few words as a topic approximation
        return "General Topic"

    def analyze_sentiment(self, text):
        """
        Analyzes sentiment of the translated text.
        """
        inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(self.device)
        with torch.no_grad():
            outputs = self.model(**inputs)
            scores = outputs.logits.cpu().numpy()[0]
            scores = softmax(scores)

        sentiment = self.labels[scores.argmax()]
        confidence = round(scores.max(), 2)
        topic = self.extract_topic(text)

        if sentiment == "Positive":
            inference = f"The sentiment around '{topic}' is positive, suggesting a favorable view."
        elif sentiment == "Negative":
            inference = f"The sentiment around '{topic}' is negative, indicating concerns or criticisms."
        else:
            inference = f"The sentiment around '{topic}' is mixed, with both positive and negative perspectives present."

        return {
            "sentiment": sentiment,
            "confidence": confidence,
            "inference": inference,
            "scores": {self.labels[i]: round(float(scores[i]), 2) for i in range(len(scores))}
        }

# Example usage (for testing)
if __name__ == "__main__":
    processor = TextProcessor()
    sample_text = input("Enter text: ")
    translation_result = processor.detect_and_translate(sample_text)
    sentiment_result = processor.analyze_sentiment(translation_result["translated_text"])

    print(f"Detected Language: {translation_result['detected_language']} (Confidence: {translation_result['confidence']})")
    print(f"Translated Text: {translation_result['translated_text']}")
    print(f"Sentiment: {sentiment_result['sentiment']} (Confidence: {sentiment_result['confidence']})")
    print(f"Inference: {sentiment_result['inference']}")



Enter text: "ಆಡಳಿತ ಪಕ್ಷದ ಇತ್ತೀಚಿನ ನೀತಿಯಲ್ಲಿ ಬದಲಾವಣೆಗಳು ಚರ್ಚೆಗೆ ಕಾರಣವಾಗಿವೆ. ಬೆಂಬಲಿಗರು ಇದರಿಂದ ಆರ್ಥಿಕ ವೃದ್ಧಿ ಹೆಚ್ಚುತ್ತದೆ ಎಂದು ಹೇಳುತ್ತಾರೆ, ಆದರೆ ಟೀಕಾಕಾರರು ಅಸಮಾನತೆ ಹೆಚ್ಚುತ್ತದೆ ಎಂದು ವಾದಿಸುತ್ತಾರೆ. ಪ್ರಮುಖ ನಗರಗಳಲ್ಲಿ ಪ್ರತಿಭಟನೆಗಳು ಯೋಜಿತವಾಗಿವೆ."
Detected Language: kn (Confidence: 1.0)
Translated Text: "Changes in the recent policy of the ruling party have led to debate. Supporters say that this will increase economic growth, but critics argue that inequality is increasing. Protests are planned in major cities."
Sentiment: Neutral (Confidence: 0.5799999833106995)
Inference: The sentiment around '"Changes in the recent' is mixed, with both positive and negative perspectives present.


Inference drawer!!!!!
