<a href="https://colab.research.google.com/github/HariKarthick22/cse22/blob/main/Copy_of_Welcome_to_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers



In [11]:
from IPython.display import display, clear_output
import ipywidgets as widgets

from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load models
sentiment_analyzer = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
sarcasm_tokenizer = T5Tokenizer.from_pretrained("mrm8488/t5-base-finetuned-sarcasm-twitter")
sarcasm_model = T5ForConditionalGeneration.from_pretrained("mrm8488/t5-base-finetuned-sarcasm-twitter")

CATEGORY_KEYWORDS = {
    "Politics": ["election", "president", "vote", "government", "minister", "campaign", "policy"],
    "Finance": ["market", "stocks", "money", "economy", "investment", "dollar", "inflation", "bank"],
    "Technology": ["ai", "software", "robot", "tech", "digital", "device", "startup", "innovation"],
    "Health": ["vaccine", "covid", "virus", "disease", "hospital", "treatment", "healthcare"],
    "Entertainment": ["movie", "music", "actor", "film", "award", "celebrity", "tv", "show"],
    "Sports": ["cricket", "football", "score", "goal", "match", "tournament", "player", "win"]
}

def get_category(headline):
    headline = headline.lower()
    scores = {cat: 0 for cat in CATEGORY_KEYWORDS}
    for cat, keywords in CATEGORY_KEYWORDS.items():
        for keyword in keywords:
            if keyword in headline:
                scores[cat] += 1
    max_score = max(scores.values())
    return max(scores.items(), key=lambda x: x[1])[0] if max_score > 0 else "General"

def detect_sarcasm(text):
    input_text = f"sarcasm: {text}"
    inputs = sarcasm_tokenizer.encode(input_text, return_tensors="pt", max_length=128, truncation=True)
    with torch.no_grad():
        outputs = sarcasm_model.generate(inputs, max_length=2)
    prediction = sarcasm_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return "Sarcastic" if prediction.lower() == "sarcasm" else "Not Sarcastic"

def analyze_headlines(headlines):
    results = []
    batch_size = 8
    for i in range(0, len(headlines), batch_size):
        batch = headlines[i:i + batch_size]
        sentiment_results = sentiment_analyzer(batch)

        for j, result in enumerate(sentiment_results):
            headline = batch[j]
            label = result['label']
            sentiment = {
                "LABEL_0": "Negative",
                "LABEL_1": "Neutral",
                "LABEL_2": "Positive"
            }.get(label, "Unknown")
            confidence = round(result['score'] * 100, 1)
            polarity = f"{sentiment} ({confidence}%)"
            sarcasm = detect_sarcasm(headline)
            negation = "not" in headline.lower() or "n't" in headline.lower()
            category = get_category(headline)

            results.append({
                "headline": headline,
                "sentiment": sentiment,
                "confidence": confidence,
                "polarity": polarity,
                "sarcasm": sarcasm,
                "negation": negation,
                "category": category
            })
    return results

def visualize_results(results_df):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
    sns.countplot(x='sentiment', data=results_df, ax=ax1, palette='Set2')
    ax1.set_title('Sentiment Distribution')
    ax1.set_ylabel('Count')

    category_counts = results_df['category'].value_counts()
    category_counts.plot.pie(autopct='%1.1f%%', ax=ax2, colors=sns.color_palette("Set3", len(category_counts)))
    ax2.set_title('Category Distribution')
    ax2.set_ylabel('')
    plt.tight_layout()
    plt.show()

# --- Interactive Interface ---
text_input = widgets.Textarea(
    value='Paste or type headlines here.\nOne headline per line.',
    placeholder='Enter headlines...',
    description='Headlines:',
    layout=widgets.Layout(width='100%', height='200px'),
    style={'description_width': 'initial'}
)

button = widgets.Button(description="Analyze Headlines", button_style='success')
output_box = widgets.Output()

def on_button_click(b):
    with output_box:
        clear_output()
        lines = text_input.value.strip().split('\n')
        headlines = [line.strip() for line in lines if line.strip()]

        if len(headlines) < 1:
            print("Please enter at least one headline.")
            return

        print("🔍 Analyzing...")
        output = analyze_headlines(headlines)
        results_df = pd.DataFrame(output)

        for i, item in enumerate(output):
            print(f"\nHeadline {i+1}: {item['headline']}")
            print(f"  Sentiment   : {item['sentiment']} (Confidence: {item['confidence']}%)")
            print(f"  Polarity    : {item['polarity']}")
            print(f"  Sarcasm     : {item['sarcasm']}")
            print(f"  Negation    : {'Yes' if item['negation'] else 'No'}")
            print(f"  Category    : {item['category']}")

        print("\n📈 SUMMARY 📈")
        print(f"Total Headlines: {len(results_df)}")
        print(f"Sentiment Counts:\n{results_df['sentiment'].value_counts()}")
        print(f"Top Category: {results_df['category'].value_counts().idxmax()}")
        visualize_results(results_df)

button.on_click(on_button_click)

display(text_input, button, output_box)


Device set to use cpu


Textarea(value='Paste or type headlines here.\nOne headline per line.', description='Headlines:', layout=Layou…

Button(button_style='success', description='Analyze Headlines', style=ButtonStyle())

Output()