<a href="https://colab.research.google.com/github/RohanGforwork/ML_text_summarization/blob/main/Translation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#Necessary Modules!!!
!pip install googletrans==4.0.0-rc1 better_profanity nltk
!pip install wordfilter



Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting better_profanity
  Downloading better_profanity-0.7.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading httpx-0.13.3-py3-none-any.whl.metadata (25 kB)
Collecting hstspreload (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading hstspreload-2025.1.1-py3-none-any.whl.metadata (2.1 kB)
Collecting chardet==3.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading chardet-3.0.4-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting idna==2.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading idna-2.10-py2.py3-none-any.whl.metadata (9.1 kB)
Collecting rfc3986<2,>=1.3 (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading rfc3986-1.5.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting httpcore==0.9.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading httpcore-0.9.1-py3-

Collecting wordfilter
  Downloading wordfilter-0.2.7-py3-none-any.whl.metadata (3.2 kB)
Downloading wordfilter-0.2.7-py3-none-any.whl (4.4 kB)
[31mERROR: Operation cancelled by user[0m[31m
[0m^C


Language translation


In [7]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from googletrans import Translator
from scipy.special import softmax

# Load model and tokenizer globally to avoid reloading on each call
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment").to(device)
model.eval()
translator = Translator()

LABELS = ["Negative", "Neutral", "Positive"]

def detect_language(text):
    """
    Detects the language of the input text.
    :param text: Input text.
    :return: Detected language and confidence score.
    """
    detected = translator.detect(text)
    return detected.lang if detected else "unknown", round(detected.confidence, 2) if detected and detected.confidence else 1.0

def translate_text(text, target_lang="en"):
    """
    Translates the text to English if it's not already in English.
    :param text: Input text.
    :param target_lang: Target language (default: English).
    :return: Translated text.
    """
    detected_lang, _ = detect_language(text)
    return translator.translate(text, dest=target_lang).text if detected_lang != target_lang else text

def analyze_sentiment(text):
    """
    Analyzes sentiment of the translated text.
    :param text: English text for sentiment analysis.
    :return: Sentiment label, confidence, and topic-based inference.
    """
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
        scores = softmax(outputs.logits.cpu().numpy()[0])

    sentiment = LABELS[scores.argmax()]
    confidence = round(scores.max(), 2)

    # Extract topic from the text
    topic = text.split('.')[0] if '.' in text else text[:50]

    # Generate inference with topic reference
    if sentiment == "Positive":
        inference = f"The discussion on '{topic}' is optimistic, suggesting support and approval."
    elif sentiment == "Negative":
        inference = f"The discussion on '{topic}' is critical, indicating concerns or disapproval."
    else:
        inference = f"The sentiment around '{topic}' is mixed, with both positive and negative perspectives present."

    return {
        "sentiment": sentiment,
        "confidence": confidence,
        "inference": inference,
        "scores": {LABELS[i]: round(float(scores[i]), 2) for i in range(len(scores))}
    }

def process_text(text):
    """
    Full pipeline: Detect language, translate (if needed), and analyze sentiment.
    :param text: User input text.
    :return: Structured output containing all analysis results.
    """
    detected_lang, lang_confidence = detect_language(text)
    translated_text = translate_text(text)
    sentiment_result = analyze_sentiment(translated_text)

    return {
        "Detected Language": detected_lang,
        "Language Confidence": lang_confidence,
        "Translated Text": translated_text,
        "Sentiment": sentiment_result["sentiment"],
        "Sentiment Confidence": sentiment_result["confidence"],
        "Inference": sentiment_result["inference"]
    }

# Example Usage
if __name__ == "__main__":
    sample_text = input("Enter text: ")
    result = process_text(sample_text)

    print(f"Detected Language: {result['Detected Language']} (Confidence: {result['Language Confidence']})")
    print(f"Translated Text: {result['Translated Text']}")
    print(f"Sentiment: {result['Sentiment']} (Confidence: {result['Sentiment Confidence']})")
    print(f"Inference: {result['Inference']}")


Enter text: ಆಡಳಿತ ಪಕ್ಷದ ಇತ್ತೀಚಿನ ನೀತಿಯಲ್ಲಿ ಬದಲಾವಣೆಗಳು ಚರ್ಚೆಗೆ ಕಾರಣವಾಗಿವೆ. ಬೆಂಬಲಿಗರು ಇದರಿಂದ ಆರ್ಥಿಕ ವೃದ್ಧಿ ಹೆಚ್ಚುತ್ತದೆ ಎಂದು ಹೇಳುತ್ತಾರೆ, ಆದರೆ ಟೀಕಾಕಾರರು ಅಸಮಾನತೆ ಹೆಚ್ಚುತ್ತದೆ ಎಂದು ವಾದಿಸುತ್ತಾರೆ. ಪ್ರಮುಖ ನಗರಗಳಲ್ಲಿ ಪ್ರತಿಭಟನೆಗಳು ಯೋಜಿತವಾಗಿವೆ."
Detected Language: kn (Confidence: 1.0)
Translated Text: Changes in the recent policy of the ruling party have led to debate.Supporters say that this will increase economic growth, but critics argue that inequality will increase.Protests are planned in major cities. ”
Sentiment: Neutral (Confidence: 0.5899999737739563)
Inference: The sentiment around 'Changes in the recent policy of the ruling party have led to debate' is mixed, with both positive and negative perspectives present.


Inference drawer!!!!!
