# Sentiment Analysis and Emotion Detection

#### Installing Dependencies

In [1]:
!pip install pandas torch transformers nltk




#### Importing Libraries

In [2]:
from google.colab import files
import pandas as pd
import torch
import nltk
import re
import torch.nn.functional as F
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification

# Download nltk punkt for sentence tokenization
nltk.download("punkt_tab")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


#### Loading Models

In [3]:
sentiment_pipeline = pipeline(
    "sentiment-analysis",
    model="cardiffnlp/twitter-roberta-base-sentiment-latest",
    tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest",
    device=0 if torch.cuda.is_available() else -1
)

emotion_model_name = "bhadresh-savani/bert-base-go-emotion"
emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name).to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/929 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model.safetensors:   0%|          | 0.00/501M [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Device set to use cpu


tokenizer_config.json:   0%|          | 0.00/333 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

#### Processing

In [None]:
# -------------------------------
# Upload CSV file in Colab
# -------------------------------
print("Please upload your CSV file with paragraphs (must have column named 'paragraph'):")
uploaded = files.upload()

# Assuming only one file uploaded, get its filename
filename = next(iter(uploaded.keys()))
print(f"Uploaded file: {filename}")

# Load CSV
df = pd.read_csv(filename)

# -------------------------------
# Sentiment and Emotion Setup
# -------------------------------
sentiment_labels = {
    "LABEL_0": "😡 Negative",
    "LABEL_1": "😐 Neutral",
    "LABEL_2": "😄 Positive"
}

# -------------------------------
# Helper functions
# -------------------------------
def predict_emotion(text):
    inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
    with torch.no_grad():
        outputs = emotion_model(**inputs)
    probs = F.softmax(outputs.logits, dim=1)
    top_prob, top_class = torch.topk(probs, 1)
    label = emotion_model.config.id2label[top_class.item()]
    return label, top_prob.item()

def paragraph_to_phrases(paragraph):
    sentences = nltk.sent_tokenize(paragraph)
    phrases = []
    for sentence in sentences:
        parts = re.split(r',|;|\band\b|\bbut\b|\bor\b|\bso\b|\byet\b', sentence, flags=re.IGNORECASE)
        parts = [p.strip() for p in parts if p.strip()]
        phrases.extend(parts)
    return phrases

# -------------------------------
# Processing paragraphs
# -------------------------------
for i, paragraph in enumerate(df["paragraph"], start=1):
    paragraph = str(paragraph).strip()
    if not paragraph:
        continue

    print(f"\n📄 Paragraph {i}:\n{paragraph}")

    # Paragraph-level Sentiment
    para_sent = sentiment_pipeline(paragraph)[0]
    pretty_sent_label = sentiment_labels.get(para_sent["label"], para_sent["label"])

    # Paragraph-level Emotion
    para_emotion, para_emotion_conf = predict_emotion(paragraph)

    print(f"\n📊 Overall Paragraph Sentiment ➤ {pretty_sent_label} (Confidence: {para_sent['score']:.2f})")
    print(f"🎭 Overall Paragraph Emotion ➤ {para_emotion} (Confidence: {para_emotion_conf:.2f})\n")

    # Phrase-level Analysis
    phrases = paragraph_to_phrases(paragraph)
    for idx, phrase in enumerate(phrases, start=1):
        phrase_sent = sentiment_pipeline(phrase)[0]
        pretty_phrase_label = sentiment_labels.get(phrase_sent["label"], phrase_sent["label"])
        phrase_emotion, phrase_emotion_conf = predict_emotion(phrase)

        print(f"✂ Phrase {idx}: {phrase}")
        print(f"   ➤ Sentiment: {pretty_phrase_label} (Confidence: {phrase_sent['score']:.2f})")
        print(f"   ➤ Emotion:   {phrase_emotion} (Confidence: {phrase_emotion_conf:.2f})\n")

Please upload your CSV file with paragraphs (must have column named 'paragraph'):


Saving 1000paragraphs.csv to 1000paragraphs.csv
Uploaded file: 1000paragraphs.csv

📄 Paragraph 1:
Dr. Amelia Hart, a brilliant astrophysicist, was the lead scientist at CREA's headquarters on Luna. She had devoted her entire life to understanding the mysteries of the universe and had become a pioneer in her field. She was determined to uncover the secrets of the cosmic rifts, a series of mysterious and seemingly unconnected energy anomalies that had started appearing throughout the galaxy.

📊 Overall Paragraph Sentiment ➤ positive (Confidence: 0.86)
🎭 Overall Paragraph Emotion ➤ admiration (Confidence: 0.46)

✂ Phrase 1: Dr. Amelia Hart
   ➤ Sentiment: neutral (Confidence: 0.82)
   ➤ Emotion:   admiration (Confidence: 0.34)

✂ Phrase 2: a brilliant astrophysicist
   ➤ Sentiment: positive (Confidence: 0.95)
   ➤ Emotion:   admiration (Confidence: 0.85)

✂ Phrase 3: was the lead scientist at CREA's headquarters on Luna.
   ➤ Sentiment: neutral (Confidence: 0.93)
   ➤ Emotion:   neutral (